From 62c2520e1931a1235034145fa7c02223f341191f Mon Sep 17 00:00:00 2001 From: Carl Higgs Date: Tue, 28 Mar 2023 10:50:41 +1100 Subject: [PATCH 01/26] removed the renaming of 'study_region' to 'City' in the city data when collating urban covariates to simplify write up of data dictionary for grid and city datasets, as per #230 and #231) --- process/subprocesses/_09_urban_covariates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/process/subprocesses/_09_urban_covariates.py b/process/subprocesses/_09_urban_covariates.py index 8a132c5b..5d393345 100644 --- a/process/subprocesses/_09_urban_covariates.py +++ b/process/subprocesses/_09_urban_covariates.py @@ -69,7 +69,7 @@ def main(): SELECT '{continent}'::text "Continent", '{country}'::text "Country", '{country_code}'::text "ISO 3166-1 alpha-2", - u.study_region "City", + u.study_region, u.area_sqkm "Area (sqkm)", u.pop_est "Population estimate", u.pop_per_sqkm "Population per sqkm", From cfdb184ae1e4444b1cf88072ba278c900f8fbd73 Mon Sep 17 00:00:00 2001 From: Carl Higgs Date: Tue, 28 Mar 2023 11:10:13 +1100 Subject: [PATCH 02/26] added basic data dictionary for output as per #230 to assets folder that can be copied for regions --- .../assets/output_data_dictionary.csv | 35 ++++++++++++++++++ .../assets/output_data_dictionary.xlsx | Bin 0 -> 13964 bytes 2 files changed, 35 insertions(+) create mode 100644 process/configuration/assets/output_data_dictionary.csv create mode 100644 process/configuration/assets/output_data_dictionary.xlsx diff --git a/process/configuration/assets/output_data_dictionary.csv b/process/configuration/assets/output_data_dictionary.csv new file mode 100644 index 00000000..5150e8db --- /dev/null +++ b/process/configuration/assets/output_data_dictionary.csv @@ -0,0 +1,35 @@ +Type of measure,Measure or data item description,City data,Grid data +Study region information,Continent,Continent,Continent +Study region information,Country,Country,Country +Study region information,2-letter country code,ISO 3166-1 alpha-2,ISO 3166-1 alpha-2 +Study region information,Study region,City,study_region +Derived study region statistics,"Area (km²; accounting for urban restrictions, if applied)",Area (sqkm),Area (sqkm) +Derived study region statistics,"Population estimate, as per configured population data source",Population estimate,Population estimate +Derived study region statistics,Population per km²,Population per sqkm,Population per sqkm +Derived study region statistics,Intersection count (following consolidation based on intersection tolerance parameter in region configuration),Intersections,Intersections +Derived study region statistics,Intersections per km²,Intersections per sqkm,Intersections per sqkm +,Linked urban covariates (e.g. estimates for 2015 from GHSL UCDB on air pollution),, +Linked covariates,"Total emission of CO 2 from the transport sector, using non-short-cycle-organic fuels in 2015",E_EC2E_T15,E_EC2E_T15 +Linked covariates,"Total emission of CO 2 from the energy sector, using short-cycle-organic fuels in 2015",E_EC2O_T15,E_EC2O_T15 +Linked covariates,Total emission of PM 2.5 from the transport sector in 2015,E_EPM2_T15,E_EPM2_T15 +Linked covariates,Total concertation of PM 2.5 for reference epoch 2014,E_CPM2_T14,E_CPM2_T14 +Analytical statistic,Sample points used in this analysis (generated along pedestrian network for populated grid areas),urban_sample_point_count,urban_sample_point_count +,"Access (for city, population percentage /100; for grid, an access score /1) within 500 metres to:",, +Indicator estimates,fresh food market / supermarket (source: OpenStreetMap or custom),pop_pct_access_500m_fresh_food_market_score,access_500m_fresh_food_market_score +Indicator estimates,convenience store (source: OpenStreetMap or custom),pop_pct_access_500m_convenience_score,access_500m_convenience_score +Indicator estimates,public transport (source: OpenStreetMap or custom),pop_pct_access_500m_pt_osm_any_score,access_500m_pt_osm_any_score +Indicator estimates,any public open space (source: OpenStreetMap),pop_pct_access_500m_public_open_space_any_score,access_500m_public_open_space_any_score +Indicator estimates,public open space larger than 1.5 hectares (source: OpenStreetMap),pop_pct_access_500m_public_open_space_large_score,access_500m_public_open_space_large_score +Indicator estimates,public transport (source: GTFS),pop_pct_access_500m_pt_gtfs_any_score,access_500m_pt_gtfs_any_score +Indicator estimates,public transport with average daytime weekday service frequency of 30 minutes or better (source: GTFS),pop_pct_access_500m_pt_gtfs_freq_30_score,access_500m_pt_gtfs_freq_30_score +Indicator estimates,public transport with average daytime weekday service frequency of 20 minutes or better (source: GTFS),pop_pct_access_500m_pt_gtfs_freq_20_score,access_500m_pt_gtfs_freq_20_score +Indicator estimates,any public transport stop (source: GTFS or OpenStreetMap/custom),pop_pct_access_500m_pt_any_score,access_500m_pt_any_score +Indicator estimates,Average walkable neighbourhood poulation density (population weighted) *,pop_nh_pop_density, +Indicator estimates,Average walkable neighbourhood intersection density (population weighted) *,pop_nh_intersection_density, +Indicator estimates,Average daily living score (population weighted) *,pop_daily_living, +Indicator estimates,Average walkability (population weighted) *,pop_walkability, +Indicator estimates,Average walkable neighbourhood poulation density ,local_nh_population_density,local_nh_population_density +Indicator estimates,Average walkable neighbourhood intersection density ,local_nh_intersection_density,local_nh_intersection_density +Indicator estimates,Average daily living score ,local_daily_living,local_daily_living +Indicator estimates,Average walkability ,local_walkability,local_walkability +,,, diff --git a/process/configuration/assets/output_data_dictionary.xlsx b/process/configuration/assets/output_data_dictionary.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..845a4eebca74b9c3ac284d80bb5ba4f916e2c1be GIT binary patch literal 13964 zcmeHu^J67xw{_4lI_$V(+qTiMJ9fvm)3Hy|F*>$w+qP|6UuNdLcV>F#`wQ+{Kh#m} zRdt@)>skBRdzY*v=m!)aa3Dw^ARv4oH$XqWD=-j{4;T;-G7u!lS3xUFdqYcmZABMr zLpx0xXA5(@%nu-BKY>8rkN@xXe|Q9nV}~p|=@5kPems7Ru0y4_ms3LZ5yl<*1oa3^ zl1B6?*un)I+G{P?CBM*_%=b&?e45*-lo1zpSu%$jKi`%)3HUc{A|1QDaB9X2?x%h3 z&=2BjC8oys$XI;zpI6r4jIll@SXcK*DGI#`OAJ$*pyZDF?WA`4iy(Rh$h-caRfM{% zSn~ojRDFnZoi+`cIERC-DJFN(zPg+pSXPa0wj8U;mm)hei=ouZvNgQn6&pkKAK-6t zUnsW^;qjhS1(sFF<3T40AV;v#BioiU^2Ti9cT&s2#t7FnYFtG%d;qmGnh9_4TLn}Bd%j7PaKkYr4P+9J?OQ8{ijOXHpzBnSo@ z>_CMPCxXj%84YK(QwaFVk5t-d4LfE}O1lp#95VA7mcc>jyR3ow1+ ziw zW3f&j0`tP7q8r9=ik*!48=jEj&CKsK#Bbi{4x-9EzT6EO-`G@t*rKanOMEOO*7&i5 z6R?IUJHpg6DhA1E2b#%q-~7Ynr=4@d*WUu8!5%)d1W`>V>s+P0^ znRoOR-1?&`W2{J{tCf%Mqwr74@T9*fwSxiyaXF|Pj__4;YOw*Gu!28EVmD5Vy85zN<8wSVbsqx6WGjcM>Xr$SVPY6YJ7aD z8=~Hli45cZ1~eYjpe3;Yo=biSm{!#&k7FvYGHUKc%5~Qdi9^;10wAcV=`p+?{yD^5pLV6o1!7+lKvMwO}M%fV~rvyrAB34 z{3%}v`QaOKsfD=auj`sv4%NPzOM=LftYs0lty}8+v-L&WNZ+|S1r43kteOUqz9T9F z&cP*-sziL@KYG3t?z$e=8~n7k;D0?KBb8PCNqdeek^*rrx2&sXic=N<2~EbXMJdC% zjB)cS5StJaH(?Z|1+$?GS2~P+WvVneB2pat0i|j*6(S8!bD*lJAw=+Sw==Bb=Th0OI zd#}`GzRtFFV=>;KVmA9gv;H&l8zk2XoW}yIpY}*t(g}+q3B(oA&fk&&utveH9BUZXFz-^3gpuJbwV zX3-<9P;g2j`LG8a$5h3 zcVL>l&;C`YMU4xBxf%h5)H^Bp>?;a}jC6c>RCEFZpJa#Jzys)~ zN<(4Wy(c5a2+YbQ5V~%d3}7-HrE_2~wHwD7{GzrxRK4x}_+8<+#J9Sx4ljw^Xy>g# zaOoC2-PZ+o;tB&woD0qcXkvdFpYpy#_!jcA`Pt};%cxmL)+6i=c9G=gLS}CF0DHW^ zeoTr<^~})|3kiJLc%*d^(vLonW2eyAOxEaE8{R0yi^8iT4N`3HTU|du7)wUTUB|l zSe`li8Va^0z0Hf2lcI%@*qc}^|BHmAS#rT1hL*2EuM*sJy#g6^VvkD8jZ~J;PO6x) z*8UTr41?TM8r@7CouV8hJY~-vlIfBllj5+Rr$R`th6!NJH%990HoWVHIYI^Ek8&BD zSfqix$R+h}wFTv;3J8qS(wJokXZw0nCED!b9W@Mv^Su*K(KpIQ)EFm+D$CZ}5Z6&|sLonnM*VN(OGQ`j#!LQ{onRoNG8 z51uZ%7&MGKtT@tlKxFDmLQKfy8zFOx$TC*lXPL)80*hXmN=p?);8yqZ24j|ERYX!R zx%)%cgH^+kedBb@nlAmpvk8bK$$jEC>#RqGP!&+hVd~l(DI5KiXL10SRiHX3fIhN566-O=R-ph1PcX_!8)9-) z>ft=a{7z(crSKrLYu;-^YI-iGHcKa;$zCyvr^#R^no?k5z;V= zwC}dp;C*rTMRC|$$#tXN+wVT~`>r_gi{;Y0LzL<51J#HR@Q8^!_JvBn#c=T zF0Qs_i-o4JDh!4$d;q_C&L7J*O3Qyaw{2N{ygG4TZfkqdYR1K!q6$dgo9pm$wM_0A z8u3zP4IL`mxV_yPx_PRz$yR%&KY6%(zOnMcp_x-DQ}?Jm+O2Q<5Hr@XGd^#$@kW0? z5vkv_K+7BPv_mW|Ew}k54Oj`0;OHf}3vPKO92Cxhw`X>v zB5iMy{M>)g>9aY1da&1swja?XO!?$O@UJ^!w z8BDFT1X|fTu_FY@7c6uFipE<>0iihckFNkoFs^)kUvzq&-p|KfAPW|cQAma#3dUKy zMKfP(3hisCOreoA0;@V1Yw+NSd)6jhPCwsxhzXsC!lDN`&oo5-wHvWgOmz;)7F?DFKk05e4g7!mIt1ae;+noWKf_tJg0rK)e0r3Wk_knIR0ADflh1*Z z7_IvFpNLjD=gc|IMCF!Vw&iOP&DP5SxVI1rte zO)CWV!^ccvQgk)<{8*BwwSo-k=W;Gj>*jo@3EDeucM#+-s$9z7kc#qRc&T{Z*PWkZ zyBIi5x8^5I5Y@>-rG+X!3dLrV$7FVWC%9*u-8!{rkV)V=MbL1{`zbAv1|2H~a{} z@EzA>MsQaz26uqVQE*kc95E)xa~mopWNK_dd`MJvM{q;#2m zQA63b(iMV#U#&`ih$)c=V34LE4w|u7oX^QPVVaC5yda2sU_RjelhBIc9q&mF*Z^(f zOQHxfouu4~>iAA3CL4@pIXp|VlIdvu>Tfc7lJXHSuu)2CnASAD-?( zMDkC1)!27W7!$aQE^V^mJ@^{NkrM*cM9C{Y~W%DtUd6Y5pm59cT!x(hxvEy?DPR z`+k=>dlN$oL)zc>zbo9a+DIrCD@q&MBQLC-(=+o{B=PF{n0d?!kwI!Kc1`WEoH8Q= zpcxApgqZzCo-8{jUcibwc9IViuH`xsim3VkcY<_ep0H$xDA`;>-4QNS^tB}~?=H>L z^~(NYvL(S)Gy$d)5U1TLd&!*=HlIK=73XEfM#khWBNiKmXb5Hb07LaGxs>i{i)b+b z**6dRAY`5qhbzv$MDeKyG9bFX-H6R+jU=p3tdlVPQy(^t-)h1q${hI;bV9(rmam?y zn+F3v;W69~2`{&TW|&Qyl!oOWibj&d-}}lXqlD zNS-DS#4mejb+n%*lu3kkz|{mTf4(s^Na?t%PS{-@beK-!LHen?L;j`w_^iuzvh40P-=p zD7d0yc?Bs|0@fY%uu}#eAa4+PBJs_E9$1-i$jbUqGWPZECE_VaB&z@<5}kTQaySX4 z`!XJ%*IdGoS~`a8ADTMBs#x;3&U)PgQnk;NKWA9+Xw$geUe6D%0^7K~uJ<3hv@08G zC?qW*h!qr0XCk%VUS7~vwcB3L4=-2`rb#b4yW5^0hT_}aE;w$=H4`R^P zJZ|=6B5@X%uyGR3@q;VQOE>h-NJ4de;GZslc~iUT1iAryi2IKDr$hZ9T3_)FI~~eT zI+w6jd%>Ey+RPRjaW5K(A`;6)D!`WY!yM6_IMxf3hR)Be)1+2nt{Q@yg2$uHPH7~H zf$sJgf2wTCFNBvH?!tv21?RGLOe(Zh2!`PlH3&iEq;dVD5)P27WI?tejo!%i68o6V6b|HFZoYqi-Z3bJf z4pEX)S|TCW5b~=>eX8%0yBvWxnEY+fuv_p?pEbK#0Ft)*lMSKhjM3bV^Wm~wc8gMg z{%%haczoe#CqHpC_s9iKA#Ooz*-HB5I;h&t7bAjtux-<%Skj`y=CH&TTr_*% z@=(J?uG;yUkzhtnj;n$ni%sYX_F=u~Zgo6%s^hwhLwgMqDg-(|aE>L4Df>s2Zc=oB z4FQE_v*fn%kd%gZ7qJ%Q##T1I&Y#GF5!bRGFojkmG}(4i6kSsz;J>UhgY4jLhfa~t zf$h@)n%cyV!Xed+<6S?Ux@%ffdRkTW>1eNVpR|6!a5u%kyREw&L69@^<!M_*HFN*=RA@A%#ZzWDYL2K8ZPjYB`pWddr4{my}^KavWA& z&~T+aB}D2*(V3Ctr+#*z10`zuc*&FWm2+hHl(ca}abr>e3>e_SnOq}WFkT{eU!D7G zwPgCpb6?R(Egqa~VG3PW$_Pu&0bNcTUI0bQ_`B9ZqUwab>V)iJe1^J+%?>o31uClZ z&h5Hz%T6HwB7cApcI=)$YF>3>b66?#XrhQ~3}Zneu&f>}BVMGv!a}2si>br6!WQ9d zYQix4s&KkVI$yL6D$bF(C$-+j#K36=nFhTEEL+CI@Q;XQ!UUxCJ|rXxhKc zbrgh5W)H~w=v{Np;0=V+U(qrZ>K!q}(=6|ubmK74Dg%XLIW%IV2f^v16Wn6EskN!S ze!wwJU}suTs(2&tsBb0lQQ{`sAlP&EJ2P|;ehXMZmrIkz_1RD;wSFz^S;HwNTEOmX z!3~dW0p-|ZMa))JrKN<32=)&3vppQ4_em~Cjox{1+SC~|nQIQ2WRHy?tqft9iEV(#6_1`Nb1NqqTxy?1ifvMSz*PU8MFicwriZ*Lvc85A9E1MxeZrzDL7q8 zkz@HHO>J9hM$-U~G~*)CyfN(&3fe(Ooj%O)MXg!7E2OGQZmF7)3KCXkn&sZYL4=L_ zqLh@kKXq8$fsg7)BK7`paJhNL2*SyS%hlS8El+-5R=_HrknC|1t3Q@SCv-VREgW*K znVppS&6C_5Vnt$}Iu=v64MEXQJ_E6~$lnoNdFZ8Fi?zphy zx3a!Dn$xWv>Jfj$&`>GuVYHf!$IVua1$Ub>M7-3en!%rH7LKO=Mm5}IA6lfd#*b$? z{IyEHp24}UluuxDowpFe!7t8O${w*o*Vxv5g)}8L^vqJrhxj4EnO}$X^793cQeS6> z+{2(1=+3mM_U&Kg7f*xoYVjQ16B7NvY2qUjK0D!&+NGc(~Db&;!$;$zhjO02j#%xF&`4iepzPm)H5oF^XUe#91 ztHlacp>9c?VXC2{9z4IOiwP@Z|C}wAkymU&-pNqEk-$8ZafObfP#e(D_R;vd%Px5^ zW^H^sOme`FB*r5Hz?zS^GdZi_Pykp_KDLKFS!3_Y??dOCn2}-1S?V!{7YIH)J{FS_fe+MS z?_Cp8GdZ5k*D@yewhb-HO1l=F@^(uUU&5ThpR(5*H{eWl*=*ZPb2^YcvshO}hEQ)R z@@8VWY{TeklKl0-VV*rulo_Xd!NKLLe8KyKvB6=pOrhu9R(Q1%X@y%v6Xl_v$lPxM zbPo7rZ745|85-mKdFne4a*nlTO;G{eQsjG1Bt~yTcWQcv7O8|4)Bu6Dz*-Iu?|gFm4!o{c*b z=7K&RF|Oovw1yp&zRBdek(blzbL2DiWx;nkQ$6(gc3uNC51hFNyuOvo*nzUWnXF(t z2%7biP7mav^wKlM_vhuFxG7`cUKyNYcAZLzraSQQtX1oqkfPC@9GE1Zfj7*^G2YJ3<*4-DCjU4Kwp6qxs}AFPZsN51UBWYOI~{irmw^ z1{#@v<(eiMJ=e8(@$Y%x^Ek-Xyo49 z-8@$$qfPA&#oLQE)LsI)v-Y+)s^aUeL&0tJZ~z@|ZnmqN&I-*s4+#2ozXl_)9jWWq zLvE&Q-x!_8&@b~Xq{m%YO3&}~{%731nHO7({a(vkMg#&v`490;bZrd{6zpxkSsMS& z|7z70%Ow_+R@`zYw(}p1-4`;mKgE#IQj?9qFthn4&3vLwLg|DVh<1j{YDDK_P6!Q2 zOAQum0yKp`jXAwKjP_}`KNm$+ug^l2&~SFe@CVjQIFL*qS6*$sN}hQHrWjM`%TszT zsM*dvu)W^I!|ZVZ7cCo--mRwGJ<*J~bSww6Xlxi1T6A!EE~>3z45{tfj(XrfWp%6< zYtvxMv|(#WYk}xdGNOG2gV(He za&+v~mN7BVbSrcmbn!HjzamnoG+0uW1bLvfG!aLYZX0S9fy~M=A_(nem1`|uV3iVk z>r>EO(J7XOP8}<=$*MG`Q!Orah#Z%@+_1f(BTJ+(q?smFGYarsV808Ntg*PhWg**C zvnKXkDucjCH&G#yqW;((`%ykyJb^rz5q9^LP=kS!b9gT?y%DtJk;pe{y>HvzS~ft- zJFCmP#O2}p#i&tAT0%+{#DKUB{SdDGH@n!hFs+YuVU&RbVw#HXFQ?@Fu4ynR@aOz-IF>p1;8{6dbq)3!bl>IHmSCg^l)#sFPwd6Wiw9E^R9Q8RR?vvPx z8ZA{;v^p>HNgG77R5bc%E?CWm=bv)kt1)OP4VCP^WhMP&`e`#IG6F>woYt+walt!` z$9Ek2G%31g$DCc2MPkdJaH47CSP@b`e$PQt{d!*t^s_$Z=b8&81&y7Qp2RRZIi0v8 zLKbri2gxphOGjE_2nGYv?(~%`G;mB8kR(>{@;0whZvp3w{9)cY&NMqU*tyRw7ac^4 z^L>GCOI$*{E-sdlT8H34yLq8K7~Digeg@(oYsRju8Zq7sj1gFWW)=-^Vk%dRC|VUt zzPvC~>09%Y2ro(c8)Pe;E@EM

>7sIt`60WV<1RJh=Y($X+L+f)owT=f=Q1IatX3-ql^}G$Z|9E=iw4nKBC(x4ybwrXJC>P7O_JLVA>2; z5yNvNvGFluYPiIHIWiP@Z8(l+=c;?1R&p9p-K?8Hzm7n3$snLFe%DI#rp|@ye_hQ^)>>(h3zeJdn8%wTmxi8&L$D1P)in zT+I?r>lA9deDf0D^s<=&=vFo$(cjMm$qu+;PqPs%hw-suku624w#UdlAUCB^#AxM< zJ(OKT5J31 z{%rVO`+ctb=WNPp!64c3{)@nSkVW{%Z2DVe=Re8%KT17+rtE(&^n8!(2|UsdfGUXM*cY8w^19Q!WL9;L&RDaPI} zd~)fn$A9E>n`Y(#{~_06ciHW(JKy}$FPb)b)E(3=-1BOxeh7nLP)1PZtaxMKh$i3m6e-oOWCQ5auD6S8@G zJYc;nP(jdmytE*c){%+eGd|l3Dp*)lI|)H)v=x^12ST2<#xCbmK;#}H(sz=Tg!akN z%Q^jrH@CvVTe+qYFhYAwRkZFgEuze14$iIUi3p6M$~X_{rB>@1V$M_3Dox(Di)3>$ z&C6#ozofR+j6~MG>s&i4v5(xwT#2j$>K3e`cT>Sdmy)#wsoI`OPxPYYG=30*+W^o& znU$@jkhmdww-Q6=k?bQeZs{o{i=#zg(1Wnxa(4!BQ~Py zj0_w5mI)t`VNf=6B0b#(59jK3p0K{W!AG*Mi0*1~HQE-5U2kU>Lr_M{dcrL^u~Ab- zO(pMy*_EvcTK0Mv*NSsDZ6uO0Iw=k0*x?ebsy?a_qfx=E*g0vJz-1k4D6@=KF3iPy zK6vpP3%uQ3LzR%XYJc|nk-C1GSa-eRoARMVymD;$aCY!1y}m!`sD!NGhe}m=DLiR_aH}jG1g*} zJ>tnyc#VxB`%%2kK(rxmYAo}Cl{5FOg17p`EkqN4=^{C44Y`LWZr4-uX_0Nltjfmp z*vg3eOr~@&w+6qg2JiP02~GYcDl;#&JiFg%O0_?(y5$`J+?%=Mnjh{EjSqI!usXcGK{{ zvP+&=SotTbSewZo&70n4EN|vk*Uh~~IN#QpMP^w(2UFNYpJv9XEC`H9S+tdNTyc$xwZg^W~-9;^D}62&Rh1e8bS<0Z`z+U97t1xY3o4v4-uQrL)<&;G|O` zc<39f4OS}_m_j>~jH2-gtCBKfQ_Y}p^Rg+AgbLu8-nOV;Kv0)f{MNu-=o_&3LSZi8R?BbLGP!W5oT^(r{CANT-LJWg}`%!f&)3|m#7~NU9X(r|T)BX=h9%bXA~&)iqfYR`a&mqD*KjMhP0OST8%UBTUoGS4Q` zYTEo~+&O!@)rwrwGJRX#!|hk8O^&CQ&KId$7X_uAFtl{1zF}>T<*lVs);vyPQc5b1 zpH3^{D+~D(m(}BQ+yz)~)ORinHokk2gbJ&kDh^@sTIq2+cQ^BjapUzbJ~;}rN;Fp0 z;cN*r7R#Pb8)@RN7i!u7!cPZh&np$Xb5l@-o<%h@SkOv_v~8o6`OTXhVRW`2##kRkV`{t)*F3 z!j-7PX26AvGKe%}LkBcA@-Y1}aF-ELCZYT};(~i(Ap~SZ1_;=q8n!6?>kS9p;Hyi9 z3W+rFO2$QMgiW4#2ZW9NSqXNy#gd4{V&D0L{&TCR>p@X~yi?PrcZihrARL`avfh9c zD%W6THAY%k6RPrXW6j#FvTr?a3OlXR*fAxn4ua1|;Xr889coP;bLD(7F+czjT+GrJ z!1w|+u@mmfmhoLvR2Lr(;d3C{8iK$;%IGB2#RoD5_Y3+E+<$ey>oXR$AMfsm|GpAL z_^bQrT3i3u`+lSBA6p85-)e~t*8dFj78iB}i(OL=)~8rcjP-gv6Byvw=Nq%Hnok9V zq~HB?6Jx>TFq`T~_z;vQBZI`+H~R^v*Pyv*-d3&>B=vH!-lHeYk(Z{mk^qqnZkDrH zH8Oame=NP5!aif+v3IeVJ84CN?m0_7sWJ6{JOvNB~_b2p#lGEm)VLZs@1 z9fAT})u4H-dX#;799ZI*=fC^5eeA~RL|6oul9IQtK9r4)6lQ3%GmkI62ZMr032{Jx zV;fZMTAU86feX%jKG-rdMIeVtL(APz9d+8PX#fn_;G^w@`gME&_}g`-;)fnotny`h zA<*u16LxiTZrp?VgSMF9IZuedH3%H@o<|^V$GmZvoh$*Bj&&9^vYs+?`=_>nb=EnL z1?nyF38iXpqr82?JGHHxfG0TuU;ccvG%Bil4?8v$89#I+{E#zz#+3~9`dYX1@2*@A zc3s@~?#knTJR9(rnbg6~-pb;?-uyql3qz!0PX5Y}rqdnmNRma_U;4WolWaVoE`duVrG<&iXDVZ?Eli#L6f>)UdQ^aePu629%o#`Zv zvJi0(Dfhcv|7ENIKdg_2ys!VTsaDZTFZ2Q?&MNOi<_T}4r|SKN$>6;!&DNUvo49zM zyk)pHjFlxB6afkWG}IctBAznc^S4i?EI%F>?){o-9G?ht{L;Xf5}_Y?g!X!uRuDj} zb~B_%1AkuZ9?VLIlV^Mj$e5hu>$L_ao`RL2hKhm8W&2U3n)*d^lm9_h&#NTbHb!p* zbsMS#ADO{@Qix8~IJnRzwID3Fsuh0nL1z) z>i4YNzdsQ3=N5@BP;oLfWNor{}rI>{gi)c*8kP;*CypZO?Tg0(|&1L{?+*J zO^<(?0s&1y|8D&MX@UF|=hsfSKas%SOBVmXzPMjeer+`P6XoQ+@8BKf*S3RS0e&r& z{0U(G-t_qn@Mq!VSJPjUt$&&tzE5+%nf{u3{T1QY1jC;QRYbpz!M~;(eg*t>z5XX) z4e{@QzpU2(b@l!$=-=bqKOupDCMke`{t@;5YX0{)<*(*))PFJmPt@|O_1~r8uh!Uf ef3ep8U!jnd1bcVl-|Aj)K*sMp_{jL%r~e1fP^zH- literal 0 HcmV?d00001 From af8f40fcbf71cb2413b7a54572312f0fd32dcfaf Mon Sep 17 00:00:00 2001 From: Carl Higgs Date: Tue, 28 Mar 2023 11:26:13 +1100 Subject: [PATCH 03/26] added code to copy over data dictionary in CSV and XLSX format to study region output folder when generating resources, as per #230 --- process/3_generate_resources.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/process/3_generate_resources.py b/process/3_generate_resources.py index 187c1219..4176a70a 100644 --- a/process/3_generate_resources.py +++ b/process/3_generate_resources.py @@ -5,6 +5,7 @@ """ import os +import shutil import sys # import and set up functions @@ -55,6 +56,17 @@ def main(): _report_functions.generate_report_for_language( config, language, indicators, policies, ) + # copy information assets to region output directory + required_assets = [ + 'output_data_dictionary.csv', + 'output_data_dictionary.xlsx', + ] + for file in required_assets: + shutil.copyfile( + f'./configuration/assets/{file}', + f"{config.region['region_dir']}/{file}", + ) + print(f"\t- copied {file} to {config.region['region_dir']}") if __name__ == '__main__': From 47dba5ed6d2bfb6c1d4aa2c0cf19e7195121080e Mon Sep 17 00:00:00 2001 From: Carl Higgs Date: Wed, 29 Mar 2023 11:24:46 +1100 Subject: [PATCH 04/26] improved output from the 3_generate_resources.py script (provide output directory and list all generate resources including data, data dictionaries, figure and maps, reports generated along with an advisory note to inspect and confirm appropriateness of outputs prior to dissemination); also amended Docker image to add pygeometa to support generation of metadata (not yet implemented) and remove the platform specific Docker build variable (more experimentation required; this may have caused an issue for an ARM64 platform based user) --- docker/Dockerfile | 2 +- docker/environment.yml | 1 + docker/requirements.txt | 14 ++++++++-- process/3_generate_resources.py | 32 ++++++++++++++-------- process/configuration/templates/config.yml | 2 +- process/subprocesses/_project_setup.py | 12 +++----- process/subprocesses/_report_functions.py | 12 ++++++-- 7 files changed, 49 insertions(+), 26 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 713d361a..088d2669 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -17,7 +17,7 @@ # >>> docker rmi $(docker images -q) --force ############################################################################## -FROM --platform=$BUILDPLATFORM continuumio/miniconda3:4.10.3-alpine as build +FROM continuumio/miniconda3:4.10.3-alpine as build LABEL maintainer="Global Healthy Liveable City Indicator Study Collaboration Group" LABEL url="https://github.com/global-healthy-liveable-cities/global-indicators" diff --git a/docker/environment.yml b/docker/environment.yml index 8cecea17..8a6ca14f 100644 --- a/docker/environment.yml +++ b/docker/environment.yml @@ -18,3 +18,4 @@ dependencies: - rasterio - urbanaccess - tqdm + - pygeometa diff --git a/docker/requirements.txt b/docker/requirements.txt index 3aa09609..b80abaaf 100644 --- a/docker/requirements.txt +++ b/docker/requirements.txt @@ -15,6 +15,7 @@ contextily==1.3.0 contourpy==1.0.7 cryptography==39.0.2 cycler==0.11.0 +dataclasses==0.8 defusedxml==0.7.1 et-xmlfile==1.1.0 Fiona==1.9.2 @@ -29,9 +30,12 @@ geopy==2.3.0 greenlet==2.0.2 idna==3.4 importlib-metadata==6.1.0 +importlib-resources==5.12.0 Jinja2==3.1.2 joblib==1.2.0 +jsonschema==4.17.3 kiwisolver==1.4.4 +lxml==4.9.2 mapclassify==2.5.0 MarkupSafe==2.1.2 matplotlib==3.7.1 @@ -44,22 +48,26 @@ numpy==1.24.2 openpyxl==3.1.1 osmnet==0.1.6 osmnx==1.3.0 +OWSLib==0.28.1 packaging==23.0 pandana==0.6.1 pandas==1.5.3 Pillow==9.4.0 pip==23.0.1 -platformdirs==3.1.1 +pkgutil_resolve_name==1.3.10 +platformdirs==3.2.0 pooch==1.7.0 psycopg2==2.9.3 pycparser==2.21 -pyOpenSSL==23.0.0 +pygeometa==0.13.1 +pyOpenSSL==23.1.0 pyparsing==3.0.9 pyproj==3.4.1 +pyrsistent==0.19.3 pyshp==2.3.1 PySocks==1.7.1 python-dateutil==2.8.2 -pytz==2022.7.1 +pytz==2023.2 PyYAML==6.0 rasterio==1.3.6 requests==2.28.2 diff --git a/process/3_generate_resources.py b/process/3_generate_resources.py index 4176a70a..d56331c2 100644 --- a/process/3_generate_resources.py +++ b/process/3_generate_resources.py @@ -47,16 +47,12 @@ class config: def main(): - languages = _report_functions.get_and_setup_language_cities(config) - if languages == []: - sys.exit( - '\nReport generation failed (no language configured for this city). Please confirm that city and its corresponding codename have been configured in the city details and language worksheets of configuration/_report_configuration.xlsx.\n\n', - ) - for language in languages: - _report_functions.generate_report_for_language( - config, language, indicators, policies, - ) - # copy information assets to region output directory + # Generate data dictionary + print('Data files') + print(f" - {os.path.basename(region_config['gpkg'])}") + print(f" - {region_config['grid_summary']}.csv") + print(f" - {region_config['city_summary']}.csv") + print('\nData dictionaries') required_assets = [ 'output_data_dictionary.csv', 'output_data_dictionary.xlsx', @@ -66,7 +62,21 @@ def main(): f'./configuration/assets/{file}', f"{config.region['region_dir']}/{file}", ) - print(f"\t- copied {file} to {config.region['region_dir']}") + print(f' {file}') + # Generate reports + languages = _report_functions.get_and_setup_language_cities(config) + if languages == []: + print( + ' - Report generation skippped. Please confirm that city and its corresponding codename have been configured in the city details and language worksheets of configuration/_report_configuration.xlsx.', + ) + else: + for language in languages: + _report_functions.generate_report_for_language( + config, language, indicators, policies, + ) + print( + '\n\nIt is important to take the time to familiarise yourself with the various outputs generated from the configuration and analysis of your region of interest to ensure they provide a fair and accurate representation given local knowledge. Limitations or issues identified should be understood and addressed or otherwise documented prior to dissemination.\n\n', + ) if __name__ == '__main__': diff --git a/process/configuration/templates/config.yml b/process/configuration/templates/config.yml index ec1ac4e3..dddd3ccd 100644 --- a/process/configuration/templates/config.yml +++ b/process/configuration/templates/config.yml @@ -12,7 +12,7 @@ project: # Specifically, this can be useful if you notice that the Docker process has been 'Killed' when running the neighbourhood analysis script. multiprocessing: 6 # Number of processors to use in multiprocessing scripts, if implemented - default_codename: example_las_palmas_2023 + default_codename: example_ES_Las_Palmas_2023 # an optional default study region as defined in regions.yml, useful for debugging analysis_timezone: Australia/Melbourne # to have localised time estimates for when processes have commenced and finished, modify this (see https://en.wikipedia.org/wiki/List_of_tz_database_time_zones). diff --git a/process/subprocesses/_project_setup.py b/process/subprocesses/_project_setup.py index ae080239..088e4006 100644 --- a/process/subprocesses/_project_setup.py +++ b/process/subprocesses/_project_setup.py @@ -394,11 +394,7 @@ def main(): main() else: print(f'\n{authors}, version {version}') - if any( - ['_generate_reports.py' in f.filename for f in inspect.stack()[1:]], - ): - print('\nGenerate reports\n') - print(f'\nOutput directory: {region_dir.replace(folder_path,"")}\n\n') - else: - print(f'\nProcessing: {name} ({codename}{is_default_codename})\n\n') - print(f'\nOutput directory: {region_dir}\n\n') + print(f'\nProcessing: {name} ({codename}{is_default_codename})\n') + print( + f"\nOutput directory: {region_dir.replace('/home/ghsci/work/','')}\n", + ) diff --git a/process/subprocesses/_report_functions.py b/process/subprocesses/_report_functions.py index eb65be29..2c490df9 100644 --- a/process/subprocesses/_report_functions.py +++ b/process/subprocesses/_report_functions.py @@ -98,6 +98,7 @@ def generate_report_for_language( phrases = prepare_phrases(config, city, language) # Generate resources if config.generate_resources: + print(f'\nFigures and maps ({language})') capture_return = generate_resources( config, gdfs['city'], @@ -110,7 +111,7 @@ def generate_report_for_language( ) # instantiate template for template in config.templates: - print(f' [{template}]') + print(f'\nReport ({template} PDF template; {language})') capture_return = generate_scorecard( config, phrases, indicators, city_policy, language, template, font, ) @@ -232,6 +233,7 @@ def generate_resources( phrases=phrases, path=f'{figure_path}/access_profile_{language}.jpg', ) + print(f' figures/access_profile_{language}.jpg') ## constrain extreme outlying walkability for representation gdf_grid['all_cities_walkability'] = gdf_grid[ 'all_cities_walkability' @@ -256,6 +258,7 @@ def generate_resources( phrases=phrases, locale=locale, ) + print(f" figures/{spatial_maps[f]['outfile']}") # Threshold maps for scenario in indicators['report']['thresholds']: threshold_map( @@ -273,6 +276,9 @@ def generate_resources( phrases=phrases, locale=locale, ) + print( + f" figures/{indicators['report']['thresholds'][scenario]['field']}_{language}.jpg", + ) # Policy ratings policy_rating( range=[0, 24], @@ -284,6 +290,7 @@ def generate_resources( locale=locale, path=f'{figure_path}/policy_presence_rating_{language}.jpg', ) + print(f' figures/policy_presence_rating_{language}.jpg') policy_rating( range=[0, 57], score=city_policy['Checklist_rating'], @@ -294,6 +301,7 @@ def generate_resources( locale=locale, path=f'{figure_path}/policy_checklist_rating_{language}.jpg', ) + print(f' figures/policy_checklist_rating_{language}.jpg') return figure_path @@ -990,7 +998,7 @@ def save_pdf_layout(pdf, folder, template, filename): if not os.path.exists(template_folder): os.mkdir(template_folder) pdf.output(f'{template_folder}/{filename}') - return f'Scorecard generated ({template_folder}):\n{filename}\n' + return f' _{template} reports/{filename}'.replace('/home/ghsci/work/', '') def generate_scorecard( From 2b3479d904fa64e67aa8f0cce77e7b11752e68ef Mon Sep 17 00:00:00 2001 From: Carl Higgs Date: Wed, 29 Mar 2023 12:01:37 +1100 Subject: [PATCH 05/26] amended spacing of some messages for cleaner output and changed the overall study region results summary name from 'city' to 'region' (as someone may use it just for a specific area, that could be a sub-part of a city, or not a city at all) --- process/2_analyse_region.py | 10 ++++++---- process/3_generate_resources.py | 15 ++++++++++----- process/subprocesses/_project_setup.py | 4 ++-- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/process/2_analyse_region.py b/process/2_analyse_region.py index 74f5dda3..977652d9 100644 --- a/process/2_analyse_region.py +++ b/process/2_analyse_region.py @@ -56,7 +56,7 @@ and current_parameters[codename] == saved_parameters[codename] ): print( - f"The saved copy of region and project parameters from a previous analysis dated {saved_parameters['date'].replace('_',' at ')} at {region_dir}/_parameters_{saved_parameters['date']}.yml matches the current configuration parameters and will be retained.\n\n", + f"The copy of region and project parameters from a previous analysis dated {saved_parameters['date'].replace('_',' at ')} saved in the output directory as _parameters_{saved_parameters['date']}.yml matches the current configuration parameters and will be retained.\n\n", ) else: shutil.copyfile( @@ -73,7 +73,7 @@ width=float('inf'), ) print( - f"Project or region parameters from a previous analysis dated {saved_parameters['date'].replace('_',' at ')} appear to have been modified. The previous parameter record file has been copied to {region_dir}/_parameters_{saved_parameters['date']}.yml, while the current ones have been saved as {region_dir}/_parameters.yml.\n\n", + f"Project or region parameters from a previous analysis dated {saved_parameters['date'].replace('_',' at ')} appear to have been modified. The previous parameter record file has been copied to the output directory as _parameters_{saved_parameters['date']}.yml, while the current ones have been saved as _parameters.yml.\n\n", ) else: with open(f'{region_dir}/_parameters.yml', 'w') as f: @@ -86,7 +86,9 @@ width=float('inf'), ) print( - f'A dated copy of project and region parameters has been saved as {region_dir}/_parameters.yml.\n\n', + f'A dated copy of project and region parameters has been saved as {region_dir}/_parameters.yml.\n\n'.replace( + '/home/ghsci/work/', '', + ), ) print( @@ -136,5 +138,5 @@ finally: duration = (time.time() - start_analysis) / 60 print( - f'{time.strftime("%Y-%m-%d_%H%M")} (analysis duration of approximately {duration:.1f} minutes)\n\nOnce the setup of study region resources has been successfully completed, we encourage you to inspect the region-specific resources located in the output directory (e.g. text log file, geopackage output, csv files, PDF report and image files).', + f'{time.strftime("%Y-%m-%d_%H%M")} (analysis duration of approximately {duration:.1f} minutes)\n\nOnce the setup of study region resources has been successfully completed.\n', ) diff --git a/process/3_generate_resources.py b/process/3_generate_resources.py index d56331c2..07f5eb0b 100644 --- a/process/3_generate_resources.py +++ b/process/3_generate_resources.py @@ -47,11 +47,16 @@ class config: def main(): + # List existing generated resources + print('\nAnalysis parameter summary text file') + print(f' {codename}.yml') + print('\nAnalysis log text file') + print(f" __{region_config['name']}__{codename}_processing_log.txt") + print('\nData files') + print(f" {os.path.basename(region_config['gpkg'])}") + print(f" {region_config['grid_summary']}.csv") + print(f" {region_config['city_summary']}.csv") # Generate data dictionary - print('Data files') - print(f" - {os.path.basename(region_config['gpkg'])}") - print(f" - {region_config['grid_summary']}.csv") - print(f" - {region_config['city_summary']}.csv") print('\nData dictionaries') required_assets = [ 'output_data_dictionary.csv', @@ -75,7 +80,7 @@ def main(): config, language, indicators, policies, ) print( - '\n\nIt is important to take the time to familiarise yourself with the various outputs generated from the configuration and analysis of your region of interest to ensure they provide a fair and accurate representation given local knowledge. Limitations or issues identified should be understood and addressed or otherwise documented prior to dissemination.\n\n', + '\n\nIt is important to take the time to familiarise yourself with the various outputs generated from the configuration and analysis of your region of interest to ensure they provide a fair and accurate representation given local knowledge. Any issues or limitations identified should be understood and can be iteratively addressed and/or acknowledged in documentation prior to dissemination.\n\n', ) diff --git a/process/subprocesses/_project_setup.py b/process/subprocesses/_project_setup.py index 088e4006..ef5a459d 100644 --- a/process/subprocesses/_project_setup.py +++ b/process/subprocesses/_project_setup.py @@ -184,7 +184,7 @@ def region_dictionary_setup(region, region_config, config, folder_path): 'gpkg' ] = f'{r["region_dir"]}/{r["study_region"]}_{study_buffer}m_buffer.gpkg' r['grid_summary'] = f'{r["study_region"]}_grid_{resolution}_{date}' - r['city_summary'] = f'{r["study_region"]}_city_{date}' + r['city_summary'] = f'{r["study_region"]}_region_{date}' if 'policy_review' in r: r['policy_review'] = f"{folder_path}/{r['policy_review']}" else: @@ -394,7 +394,7 @@ def main(): main() else: print(f'\n{authors}, version {version}') - print(f'\nProcessing: {name} ({codename}{is_default_codename})\n') + print(f'\nProcessing: {name} ({codename}{is_default_codename})') print( f"\nOutput directory: {region_dir.replace('/home/ghsci/work/','')}\n", ) From 98a9a196be753141b56330fdc7c3f897dcc21bdf Mon Sep 17 00:00:00 2001 From: Carl Higgs Date: Wed, 29 Mar 2023 12:32:01 +1100 Subject: [PATCH 06/26] implemented text wrapping for better printing to screen --- .../1_create_project_configuration_files.py | 13 +++++++----- process/2_analyse_region.py | 19 +++++++++--------- process/3_generate_resources.py | 5 +++-- process/subprocesses/_project_setup.py | 8 ++++---- process/subprocesses/_utils.py | 20 +++++++++++++++++-- 5 files changed, 43 insertions(+), 22 deletions(-) diff --git a/process/1_create_project_configuration_files.py b/process/1_create_project_configuration_files.py index c305c2c3..3b2fa5ee 100644 --- a/process/1_create_project_configuration_files.py +++ b/process/1_create_project_configuration_files.py @@ -8,6 +8,8 @@ import shutil import sys +from subprocesses._utils import get_terminal_columns, print_autobreak + source_folder = './configuration/templates' dest_folder = './configuration' region_names = [ @@ -55,7 +57,7 @@ notes: Notes for this region """ -print( +print_autobreak( 'Creating project configuration files, if not already existing in the configuration folder...', ) try: @@ -73,17 +75,18 @@ if len(sys.argv) >= 2: codename = sys.argv[1] if os.path.exists(f'./configuration/regions/{codename}.yml'): - print( + print_autobreak( f"\nConfiguration file for the specified study region codename '{codename}' already exists: \nconfiguration/regions/{codename}.yml.\n\nPlease open and edit this file in a text editor following the provided example directions in order to complete configuration for your study region. A completed example study region configuration can be viewed in the file 'configuration/regions/example_ES_Las_Palmas_2023.yml'.\n\nTo view additional guidance on configuration, run this script again without a codename. Once configuration has been completed, to proceed to analysis for this city, enter:\npython 2_analyse_region.py {codename}\n", ) else: with open(f'./configuration/regions/{codename}.yml', 'w') as f: f.write(region_template) - print( + print_autobreak( f"\nNew region configuration file has been initialised using the codename, '{codename}', in the folder:\nconfiguration/regions/{codename}.yml\n\nPlease open and edit this file in a text editor following the provided example directions in order to complete configuration for your study region. A completed example study region configuration can be viewed in the file 'configuration/regions/example_ES_Las_Palmas_2023.yml'.\n\nTo view additional guidance on configuration, run this script again without a codename.\n", ) else: - print( + list_seperation = '\n ' + print_autobreak( f""" Before commencing analysis, your study regions will need to be configured. @@ -102,7 +105,7 @@ Optional configuration of other parameters is also possible. Please visit our tool's website for further guidance on how to use this tool: https://global-healthy-liveable-cities.github.io/ -Currently configured study regions : {', '.join(region_names)} +Currently configured study regions : {list_seperation}{list_seperation.join(region_names)} To initialise a new study region configuration file, you can run this script again providing your choice of codename, for example: diff --git a/process/2_analyse_region.py b/process/2_analyse_region.py index 977652d9..26d4d6e7 100644 --- a/process/2_analyse_region.py +++ b/process/2_analyse_region.py @@ -25,6 +25,7 @@ time, version, ) +from subprocesses._utils import get_terminal_columns, print_autobreak from tqdm.auto import tqdm # Create study region folder if not exists @@ -55,7 +56,7 @@ current_parameters['project'] == saved_parameters['project'] and current_parameters[codename] == saved_parameters[codename] ): - print( + print_autobreak( f"The copy of region and project parameters from a previous analysis dated {saved_parameters['date'].replace('_',' at ')} saved in the output directory as _parameters_{saved_parameters['date']}.yml matches the current configuration parameters and will be retained.\n\n", ) else: @@ -72,8 +73,8 @@ sort_keys=False, width=float('inf'), ) - print( - f"Project or region parameters from a previous analysis dated {saved_parameters['date'].replace('_',' at ')} appear to have been modified. The previous parameter record file has been copied to the output directory as _parameters_{saved_parameters['date']}.yml, while the current ones have been saved as _parameters.yml.\n\n", + print_autobreak( + f"Project or region parameters from a previous analysis dated {saved_parameters['date'].replace('_',' at ')} appear to have been modified. The previous parameter record file has been copied to the output directory as _parameters_{saved_parameters['date']}.yml, while the current ones have been saved as _parameters.yml.\n", ) else: with open(f'{region_dir}/_parameters.yml', 'w') as f: @@ -85,13 +86,13 @@ sort_keys=False, width=float('inf'), ) - print( - f'A dated copy of project and region parameters has been saved as {region_dir}/_parameters.yml.\n\n'.replace( + print_autobreak( + f'A dated copy of project and region parameters has been saved as {region_dir}/_parameters.yml.\n'.replace( '/home/ghsci/work/', '', ), ) -print( +print_autobreak( f'Analysis time zone: {analysis_timezone} (to set time zone for where you are, edit config.yml)', ) start_analysis = time.time() @@ -132,11 +133,11 @@ stdout=append_to_log_file, ) except Exception as e: - print( + print_autobreak( f'\n\nProcessing {step} failed: {e}\n\n Please review the processing log file for this study region for more information on what caused this error and how to resolve it. The file __{name}__{codename}_processing_log.txt is located in the output directory and may be opened for viewing in a text editor.', ) finally: duration = (time.time() - start_analysis) / 60 - print( - f'{time.strftime("%Y-%m-%d_%H%M")} (analysis duration of approximately {duration:.1f} minutes)\n\nOnce the setup of study region resources has been successfully completed.\n', + print_autobreak( + f'{time.strftime("%Y-%m-%d_%H%M")} (analysis duration of approximately {duration:.1f} minutes)\n', ) diff --git a/process/3_generate_resources.py b/process/3_generate_resources.py index 07f5eb0b..3f3d5fb8 100644 --- a/process/3_generate_resources.py +++ b/process/3_generate_resources.py @@ -18,6 +18,7 @@ region_config, region_names, ) +from subprocesses._utils import get_terminal_columns, print_autobreak class config: @@ -71,7 +72,7 @@ def main(): # Generate reports languages = _report_functions.get_and_setup_language_cities(config) if languages == []: - print( + print_autobreak( ' - Report generation skippped. Please confirm that city and its corresponding codename have been configured in the city details and language worksheets of configuration/_report_configuration.xlsx.', ) else: @@ -79,7 +80,7 @@ def main(): _report_functions.generate_report_for_language( config, language, indicators, policies, ) - print( + print_autobreak( '\n\nIt is important to take the time to familiarise yourself with the various outputs generated from the configuration and analysis of your region of interest to ensure they provide a fair and accurate representation given local knowledge. Any issues or limitations identified should be understood and can be iteratively addressed and/or acknowledged in documentation prior to dissemination.\n\n', ) diff --git a/process/subprocesses/_project_setup.py b/process/subprocesses/_project_setup.py index ef5a459d..ac43fa82 100644 --- a/process/subprocesses/_project_setup.py +++ b/process/subprocesses/_project_setup.py @@ -183,8 +183,8 @@ def region_dictionary_setup(region, region_config, config, folder_path): r[ 'gpkg' ] = f'{r["region_dir"]}/{r["study_region"]}_{study_buffer}m_buffer.gpkg' - r['grid_summary'] = f'{r["study_region"]}_grid_{resolution}_{date}' - r['city_summary'] = f'{r["study_region"]}_region_{date}' + r['grid_summary'] = f'{r["study_region"]}_grid_{resolution}' + r['city_summary'] = f'{r["study_region"]}_region' if 'policy_review' in r: r['policy_review'] = f"{folder_path}/{r['policy_review']}" else: @@ -394,7 +394,7 @@ def main(): main() else: print(f'\n{authors}, version {version}') - print(f'\nProcessing: {name} ({codename}{is_default_codename})') + print(f'\n{name} ({codename}{is_default_codename})') print( - f"\nOutput directory: {region_dir.replace('/home/ghsci/work/','')}\n", + f"\nOutput directory:\n {region_dir.replace('/home/ghsci/work/','')}\n", ) diff --git a/process/subprocesses/_utils.py b/process/subprocesses/_utils.py index bf3bc10a..bcd8e610 100644 --- a/process/subprocesses/_utils.py +++ b/process/subprocesses/_utils.py @@ -2,9 +2,25 @@ Utility functions. Purpose: These functions may be used in other scripts to undertake specific tasks. -Context: Liveability indicator calculation (general tools for data wrangling) - """ +import shutil +import textwrap + + +# 'pretty' text wrapping as per https://stackoverflow.com/questions/37572837/how-can-i-make-python-3s-print-fit-the-size-of-the-command-prompt +def get_terminal_columns(): + return shutil.get_terminal_size().columns + + +def print_autobreak(*args, sep=' '): + width = ( + get_terminal_columns() + ) # Check size once to avoid rechecks per "paragraph" + # Convert all args to strings, join with separator, then split on any newlines, + # preserving line endings, so each "paragraph" wrapped separately + for line in sep.join(map(str, args)).splitlines(True): + # Py3's print function makes it easy to print textwrap.wrap's result as one-liner + print(*textwrap.wrap(line, width), sep='\n') def reproject_raster(inpath, outpath, new_crs): From 2616f3d51ef441f0ca0e719aed26a61a357305d2 Mon Sep 17 00:00:00 2001 From: Carl Higgs Date: Wed, 29 Mar 2023 12:54:52 +1100 Subject: [PATCH 07/26] more minor output printing tidying up --- process/2_analyse_region.py | 6 +++--- process/3_generate_resources.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/process/2_analyse_region.py b/process/2_analyse_region.py index 26d4d6e7..56192bf2 100644 --- a/process/2_analyse_region.py +++ b/process/2_analyse_region.py @@ -96,7 +96,7 @@ f'Analysis time zone: {analysis_timezone} (to set time zone for where you are, edit config.yml)', ) start_analysis = time.time() -print(f"Analysis start: {time.strftime('%Y-%m-%d_%H%M')}") +print(f"Analysis start:\t{time.strftime('%Y-%m-%d_%H%M')}") study_region_setup = { '_00_create_database.py': 'Create database', '_01_create_study_region.py': 'Create study region', @@ -138,6 +138,6 @@ ) finally: duration = (time.time() - start_analysis) / 60 - print_autobreak( - f'{time.strftime("%Y-%m-%d_%H%M")} (analysis duration of approximately {duration:.1f} minutes)\n', + print( + f'Analysis end:\t{time.strftime("%Y-%m-%d_%H%M")} (approximately {duration:.1f} minutes)\n', ) diff --git a/process/3_generate_resources.py b/process/3_generate_resources.py index 3f3d5fb8..99efd1ab 100644 --- a/process/3_generate_resources.py +++ b/process/3_generate_resources.py @@ -49,7 +49,7 @@ class config: def main(): # List existing generated resources - print('\nAnalysis parameter summary text file') + print('Analysis parameter summary text file') print(f' {codename}.yml') print('\nAnalysis log text file') print(f" __{region_config['name']}__{codename}_processing_log.txt") From f57c0b46965dc6deb3c0e59b7c209bdfffb00616 Mon Sep 17 00:00:00 2001 From: Carl Higgs Date: Wed, 29 Mar 2023 15:23:21 +1100 Subject: [PATCH 08/26] added in some additional error checking for mis-specified configurations --- process/subprocesses/_project_setup.py | 12 ++++++++---- process/subprocesses/_utils.py | 11 +++++++++++ 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/process/subprocesses/_project_setup.py b/process/subprocesses/_project_setup.py index ac43fa82..241fb83e 100644 --- a/process/subprocesses/_project_setup.py +++ b/process/subprocesses/_project_setup.py @@ -86,17 +86,21 @@ def region_data_setup( try: if data not in datasets or datasets[data] is None: raise SystemExit( - f'An entry for at least one {data} dataset does not appear to have been defined in datasets.yml. This parameter is required for analysis, and is used to cross-reference a relevant dataset defined in datasets.yml with region configuration in {region}.yml. Please update datasets.yml to proceed.', + f'\nAn entry for at least one {data} dataset does not appear to have been defined in datasets.yml. This parameter is required for analysis, and is used to cross-reference a relevant dataset defined in datasets.yml with region configuration in {region}.yml. Please update datasets.yml to proceed.\n', ) elif region_config[data] is None: raise SystemExit( - f'The entry for {data} does not appear to have been defined in {region}.yml. This parameter is required for analysis, and is used to cross-reference a relevant dataset defined in datasets.yml. Please update {region}.yml to proceed.', + f'\nThe entry for {data} does not appear to have been defined in {region}.yml. This parameter is required for analysis, and is used to cross-reference a relevant dataset defined in datasets.yml. Please update {region}.yml to proceed.\n', + ) + elif datasets[data][region_config[data]] is None: + raise SystemExit( + f'\nThe configured entry for {region_config[data]} under {data} within datasets.yml does not appear to be associated within any values. Please check and amend the specification for this entry within datasets.yml , or the configuration within {region}.yml to proceed. (is this entry and its records indented as per the provided example?)\n', ) else: if 'citation' not in datasets[data][region_config[data]]: if data != 'OpenStreetMap': raise SystemExit( - f'No citation record has been configured for the {data} dataset configured for this region. Please add this to its record in datasets.yml (see template datasets.yml for examples).', + f'\nNo citation record has been configured for the {data} dataset configured for this region. Please add this to its record in datasets.yml (see template datasets.yml for examples).\n', ) elif 'source' not in region_config['OpenStreetMap']: datasets[data][region_config[data]][ @@ -118,7 +122,7 @@ def region_data_setup( 'data_dir' ] = f"{data_path}/{datasets[data][region_config[data]]['data_dir']}" return data_dictionary - except Exception: + except Exception as e: raise e diff --git a/process/subprocesses/_utils.py b/process/subprocesses/_utils.py index bcd8e610..e33eb278 100644 --- a/process/subprocesses/_utils.py +++ b/process/subprocesses/_utils.py @@ -23,6 +23,17 @@ def print_autobreak(*args, sep=' '): print(*textwrap.wrap(line, width), sep='\n') +def wrap_autobreak(*args, sep=' '): + width = ( + get_terminal_columns() + ) # Check size once to avoid rechecks per "paragraph" + # Convert all args to strings, join with separator, then split on any newlines, + # preserving line endings, so each "paragraph" wrapped separately + for line in sep.join(map(str, args)).splitlines(True): + # Py3's print function makes it easy to print textwrap.wrap's result as one-liner + return '\n'.join(textwrap.wrap(line, width)) + + def reproject_raster(inpath, outpath, new_crs): import rasterio from rasterio.warp import ( From 26439f79d07a81a299ac8621742ac00a50ab1c30 Mon Sep 17 00:00:00 2001 From: Carl Higgs Date: Wed, 29 Mar 2023 17:06:40 +1100 Subject: [PATCH 09/26] updated the neighbourhood analysis script to read from SQL, not gpkg (the latter has slow read/write with non-concurrent access that can cause problems; also results in redundant read/writes as data is already written to be read from the database. This commit address #216 --- .../_04_create_population_grid.py | 1 + .../_07_locate_origins_destinations.py | 24 +-- .../_12_neighbourhood_analysis.py | 150 +++++++++--------- process/subprocesses/_project_setup.py | 2 +- process/subprocesses/setup_sp.py | 7 +- 5 files changed, 92 insertions(+), 92 deletions(-) diff --git a/process/subprocesses/_04_create_population_grid.py b/process/subprocesses/_04_create_population_grid.py index 498e391e..72be20ca 100644 --- a/process/subprocesses/_04_create_population_grid.py +++ b/process/subprocesses/_04_create_population_grid.py @@ -13,6 +13,7 @@ # Set up project and region parameters for GHSCIC analyses from _project_setup import * from _utils import reproject_raster +from geoalchemy2 import Geometry from osgeo import gdal from script_running_log import script_running_log from sqlalchemy import create_engine, inspect, text diff --git a/process/subprocesses/_07_locate_origins_destinations.py b/process/subprocesses/_07_locate_origins_destinations.py index f2ebc50c..b4134e85 100644 --- a/process/subprocesses/_07_locate_origins_destinations.py +++ b/process/subprocesses/_07_locate_origins_destinations.py @@ -66,25 +66,29 @@ def main(): DROP TABLE IF EXISTS sampling_locate_line; CREATE TABLE sampling_locate_line AS SELECT s.point_id, - s.ogc_fid edge_ogc_fid, - s.metres, - "from" n1, - "to" n2, - e.geom AS edge_geom, - ST_LineLocatePoint(e.geom, n1.geom) llp1, - ST_LineLocatePoint(e.geom, s.geom) llpm, - ST_LineLocatePoint(e.geom, n2.geom) llp2, - s.geom + s.ogc_fid edge_ogc_fid, + o.grid_id, + s.metres, + "from" n1, + "to" n2, + e.geom AS edge_geom, + ST_LineLocatePoint(e.geom, n1.geom) llp1, + ST_LineLocatePoint(e.geom, s.geom) llpm, + ST_LineLocatePoint(e.geom, n2.geom) llp2, + s.geom FROM {points} s LEFT JOIN edges e ON s.ogc_fid = e.ogc_fid LEFT JOIN nodes n1 ON e."from" = n1.osmid - LEFT JOIN nodes n2 ON e."to" = n2.osmid; + LEFT JOIN nodes n2 ON e."to" = n2.osmid + LEFT JOIN {population_grid} o + ON ST_Intersects(o.geom,s.geom); -- part 2 (split to save memory on parallel worker query) DROP TABLE IF EXISTS sampling_temp; CREATE TABLE sampling_temp AS SELECT point_id, edge_ogc_fid, + grid_id, metres, n1, n2, diff --git a/process/subprocesses/_12_neighbourhood_analysis.py b/process/subprocesses/_12_neighbourhood_analysis.py index 6165962b..24e48573 100644 --- a/process/subprocesses/_12_neighbourhood_analysis.py +++ b/process/subprocesses/_12_neighbourhood_analysis.py @@ -10,7 +10,7 @@ city summaries by running 03_aggregation.py. As a result of running the script, a layer is added to the study region's geopackage file -containing sample point indicators ("samplePointsData"). These indicators include: +containing sample point indicators ("sample_points"). These indicators include: 1. average population and intersection density per sample sample point 2. accessibility, dailyliving and walkability score per sample point """ @@ -19,7 +19,6 @@ import sys import time -import fiona import geopandas as gpd import networkx as nx import numpy as np @@ -28,6 +27,7 @@ # Set up project and region parameters for GHSCIC analyses from _project_setup import * +from geoalchemy2 import Geometry from setup_sp import ( binary_access_score, cal_dist_node_to_nearest_pois, @@ -36,6 +36,7 @@ filter_ids, spatial_join_index_to_gdf, ) +from sqlalchemy import create_engine, inspect, text from tqdm import tqdm # Hard coded density variable names @@ -45,30 +46,22 @@ def main(): startTime = time.time() - - for file in [gpkg, graphml_proj]: - if not os.path.exists(file): - sys.exit( - f"\n\nSpatial features required for analysis of this city ({file}) weren't able to be located; please confirm that the study region setup scripts have been successfully completed and that this file exists for this study region in the specified path.\n\n", - ) - - # Check if geopackage has a -wal file associated with it - # if so it is likely open and locked for use by another software package (e.g. QGIS) - # and will be unable to be used - if os.path.exists(f'{gpkg}-wal'): - sys.exit( - f'\nIt appears that the required geopackage {gpkg} may be open in another software package, ' - 'due to the presence of a Write Ahead Logging (WAL) file associated with it. Please ensure that the input ' - 'geopackage is not being used in any other software before continuing, and that the file ' - f"'{gpkg}-wal' is not present before continuing.", - ) - - input_layers = fiona.listlayers(gpkg) + engine = create_engine( + f'postgresql://{db_user}:{db_pwd}@{db_host}/{db}', + pool_pre_ping=True, + connect_args={ + 'keepalives': 1, + 'keepalives_idle': 30, + 'keepalives_interval': 10, + 'keepalives_count': 5, + }, + ) + db_contents = inspect(engine) G_proj = ox.load_graphml(graphml_proj) - - grid = gpd.read_file(gpkg, layer=population_grid) - grid.set_index('grid_id', inplace=True) - + with engine.connect() as connection: + grid = gpd.read_postgis( + population_grid, connection, index_col='grid_id', + ) print( '\nFirst pass node-level neighbourhood analysis (Calculate average population and intersection density ' 'for each intersection node in study regions, taking mean values from distinct grid cells within ' @@ -76,12 +69,15 @@ def main(): ) nh_startTime = time.time() # read from disk if exist - if 'nodes_pop_intersect_density' in input_layers: - print(' - Read population and intersection density from local file.') - gdf_nodes_simple = gpd.read_file( - gpkg, layer='nodes_pop_intersect_density', - ) - gdf_nodes_simple.set_index('osmid', inplace=True) + if db_contents.has_table('nodes_pop_intersect_density'): + print(' - Read population and intersection density from database.') + with engine.connect() as connection: + gdf_nodes_simple = gpd.read_postgis( + 'nodes_pop_intersect_density', + connection, + index_col='osmid', + geom_col='geometry', + ) else: print(' - Set up simple nodes') gdf_nodes = ox.graph_to_gdfs(G_proj, nodes=True, edges=False) @@ -164,12 +160,13 @@ def main(): ) gdf_nodes_simple = gdf_nodes_simple.join(result) # save in geopackage (so output files are all kept together) - gdf_nodes_simple.to_file( - gpkg, layer='nodes_pop_intersect_density', driver='GPKG', - ) + with engine.connect() as connection: + gdf_nodes_simple.to_postgis( + 'nodes_pop_intersect_density', connection, index='osmid', + ) print( 'Time taken to calculate or load city local neighbourhood statistics: ' - f'{(time.time() - nh_startTime)/60:02g} mins', + f'{(time.time() - nh_startTime)/60:.02f} mins', ) # Calculate accessibility to points of interest and walkability for sample points: # 1. using pandana packadge to calculate distance to access from sample @@ -192,8 +189,9 @@ def main(): print(f'\n\t- {analysis_key}') analysis = indicators['nearest_node_analyses'][analysis_key] layer_analysis_count = len(analysis['layers']) + gdf_poi_layers = {} for layer in analysis['layers']: - if layer in input_layers and layer is not None: + if db_contents.has_table(layer) and layer is not None: output_names = analysis['output_names'].copy() if layer_analysis_count > 1 and layer_analysis_count == len( analysis['output_names'], @@ -203,15 +201,18 @@ def main(): output_names[analysis['layers'].index(layer)], ] print(f'\t\t{output_names}') - gdf_poi = gpd.read_file( - analysis['geopackage'].format(gpkg=gpkg), layer=layer, - ) + if layer not in gdf_poi_layers: + with engine.connect() as connection: + gdf_poi_layers[layer] = gpd.read_postgis( + layer, connection, + ) distance_results[ f'{analysis}_{layer}' ] = cal_dist_node_to_nearest_pois( - gdf_poi, - accessibility_distance, - network, + gdf_poi_layers[layer], + geometry='geom', + distance=accessibility_distance, + network=network, category_field=analysis['category_field'], categories=analysis['categories'], filter_field=analysis['filter_field'], @@ -254,33 +255,28 @@ def main(): round(gdf_nodes_poi_dist, 0).replace(-999, np.nan).astype('Int64') ) # read sample points from disk (in city-specific geopackage) - samplePointsData = gpd.read_file(gpkg, layer='urban_sample_points') - # create 'grid_id' for sample point, if it not exists - if 'grid_id' not in samplePointsData.columns: - samplePointsData = spatial_join_index_to_gdf(samplePointsData, grid) - samplePointsData = filter_ids( - df=samplePointsData, - query=f"""grid_id not in {list(grid.query(f'pop_est < {population["pop_min_threshold"]}').index.values)}""", - message='Restrict sample points to those not located in grids with a population below ' - f"the minimum threshold value ({population['pop_min_threshold']})...", - ) - samplePointsData = filter_ids( - df=samplePointsData, + with engine.connect() as connection: + sample_points = gpd.read_postgis('urban_sample_points', connection) + sample_points.columns = [ + 'geometry' if x == 'geom' else x for x in sample_points.columns + ] + sample_points = filter_ids( + df=sample_points, query=f"""n1 in {list(gdf_nodes_simple.index.values)} and n2 in {list(gdf_nodes_simple.index.values)}""", message='Restrict sample points to those with two associated sample nodes...', ) - samplePointsData.set_index('point_id', inplace=True) + sample_points.set_index('point_id', inplace=True) distance_names = list(gdf_nodes_poi_dist.columns) # Estimate full distance to destinations for sample points full_nodes = create_full_nodes( - samplePointsData, + sample_points, gdf_nodes_simple, gdf_nodes_poi_dist, distance_names, population_density, intersection_density, ) - samplePointsData = samplePointsData[ + sample_points = sample_points[ ['grid_id', 'edge_ogc_fid', 'geometry'] ].join(full_nodes, how='left') # create binary access scores evaluated against accessibility distance @@ -288,8 +284,8 @@ def main(): access_score_names = [ f"{x.replace('nearest_node','access')}_score" for x in distance_names ] - samplePointsData[access_score_names] = binary_access_score( - samplePointsData, distance_names, accessibility_distance, + sample_points[access_score_names] = binary_access_score( + sample_points, distance_names, accessibility_distance, ) print('Calculating sample point specific analyses ...') # Defined in generated config file, e.g. daily living score, walkability index, etc @@ -304,33 +300,29 @@ def main(): ] axis = indicators['sample_point_analyses'][analysis][var]['axis'] if formula == 'sum': - samplePointsData[var] = samplePointsData[columns].sum( - axis=axis, - ) + sample_points[var] = sample_points[columns].sum(axis=axis) if formula == 'max': - samplePointsData[var] = samplePointsData[columns].max( - axis=axis, - ) + sample_points[var] = sample_points[columns].max(axis=axis) if formula == 'sum_of_z_scores': - samplePointsData[var] = ( - ( - samplePointsData[columns] - - samplePointsData[columns].mean() - ) - / samplePointsData[columns].std() + sample_points[var] = ( + (sample_points[columns] - sample_points[columns].mean()) + / sample_points[columns].std() ).sum(axis=1) # grid_id and edge_ogc_fid are integers - samplePointsData[samplePointsData.columns[0:2]] = samplePointsData[ - samplePointsData.columns[0:2] + sample_points[sample_points.columns[0:2]] = sample_points[ + sample_points.columns[0:2] ].astype(int) # remaining non-geometry fields are float - samplePointsData[samplePointsData.columns[3:]] = samplePointsData[ - samplePointsData.columns[3:] + sample_points[sample_points.columns[3:]] = sample_points[ + sample_points.columns[3:] ].astype(float) - print('Save to geopackage...') - # save the sample points with all the desired results to a new layer in geopackage - samplePointsData = samplePointsData.reset_index() - samplePointsData.to_file(gpkg, layer='samplePointsData', driver='GPKG') + print('Save to database...') + # save the sample points with all the desired results to a new layer in the database + sample_points = sample_points.set_geometry('geometry') + with engine.connect() as connection: + sample_points.to_postgis( + 'sample_point_indicators', connection, if_exists='replace', + ) endTime = time.time() - startTime print(f'Total time is : {endTime / 60:.2f} minutes') diff --git a/process/subprocesses/_project_setup.py b/process/subprocesses/_project_setup.py index 241fb83e..9777e1ff 100644 --- a/process/subprocesses/_project_setup.py +++ b/process/subprocesses/_project_setup.py @@ -162,7 +162,7 @@ def region_dictionary_setup(region, region_config, config, folder_path): region, region_config, 'urban_region', data_path, ) r['buffered_urban_study_region'] = buffered_urban_study_region - r['db'] = f'li_{region}_{r["year"]}'.lower() + r['db'] = region.lower() r['dbComment'] = f'Liveability indicator data for {region} {r["year"]}.' r['population'] = region_data_setup( region, region_config, 'population', data_path, diff --git a/process/subprocesses/setup_sp.py b/process/subprocesses/setup_sp.py index fb7f642e..5708733f 100644 --- a/process/subprocesses/setup_sp.py +++ b/process/subprocesses/setup_sp.py @@ -110,6 +110,7 @@ def create_pdna_net(gdf_nodes, gdf_edges, predistance=500): def cal_dist_node_to_nearest_pois( gdf_poi, + geometry, distance, network, category_field=None, @@ -125,6 +126,8 @@ def cal_dist_node_to_nearest_pois( ---------- gdf_poi: GeoDataFrame GeoDataFrame of destination point-of-interest + geometry: str + geometry column name distance: int the maximum search distance network: pandana network @@ -145,8 +148,8 @@ def cal_dist_node_to_nearest_pois( ------- GeoDataFrame """ - gdf_poi['x'] = gdf_poi['geometry'].apply(lambda x: x.x) - gdf_poi['y'] = gdf_poi['geometry'].apply(lambda x: x.y) + gdf_poi['x'] = gdf_poi[geometry].apply(lambda x: x.x) + gdf_poi['y'] = gdf_poi[geometry].apply(lambda x: x.y) if category_field is not None and categories is not None: # Calculate distances iterating over categories appended_data = [] From 0edcfbd531fa71769bc9ac630e9aaf6f45190f7f Mon Sep 17 00:00:00 2001 From: Carl Higgs Date: Wed, 29 Mar 2023 19:48:56 +1100 Subject: [PATCH 10/26] migrated analysis from using geopackage to using SQL as per #216 --- process/2_analyse_region.py | 5 +- ...lysis.py => _11_neighbourhood_analysis.py} | 10 +- ...{_13_aggregation.py => _12_aggregation.py} | 17 ++- process/subprocesses/setup_aggr.py | 115 ++++++++++-------- 4 files changed, 88 insertions(+), 59 deletions(-) rename process/subprocesses/{_12_neighbourhood_analysis.py => _11_neighbourhood_analysis.py} (95%) rename process/subprocesses/{_13_aggregation.py => _12_aggregation.py} (78%) diff --git a/process/2_analyse_region.py b/process/2_analyse_region.py index 56192bf2..31b0da04 100644 --- a/process/2_analyse_region.py +++ b/process/2_analyse_region.py @@ -109,9 +109,8 @@ '_08_destination_summary.py': 'Summarise spatial distribution', '_09_urban_covariates.py': 'Collate urban covariates', '_10_gtfs_analysis.py': 'Analyse GTFS Feeds', - '_11_export_gpkg.py': 'Export geopackage', - '_12_neighbourhood_analysis.py': 'Analyse neighbourhoods', - '_13_aggregation.py': 'Aggregate region summary analyses', + '_11_neighbourhood_analysis.py': 'Analyse neighbourhoods', + '_12_aggregation.py': 'Aggregate region summary analyses', } pbar = tqdm( study_region_setup, diff --git a/process/subprocesses/_12_neighbourhood_analysis.py b/process/subprocesses/_11_neighbourhood_analysis.py similarity index 95% rename from process/subprocesses/_12_neighbourhood_analysis.py rename to process/subprocesses/_11_neighbourhood_analysis.py index 24e48573..6e2420c4 100644 --- a/process/subprocesses/_12_neighbourhood_analysis.py +++ b/process/subprocesses/_11_neighbourhood_analysis.py @@ -318,10 +318,16 @@ def main(): ].astype(float) print('Save to database...') # save the sample points with all the desired results to a new layer in the database - sample_points = sample_points.set_geometry('geometry') + sample_points.columns = [ + 'geom' if x == 'geometry' else x for x in sample_points.columns + ] + sample_points = sample_points.set_geometry('geom') with engine.connect() as connection: sample_points.to_postgis( - 'sample_point_indicators', connection, if_exists='replace', + 'sample_point_indicators', + connection, + index=True, + if_exists='replace', ) endTime = time.time() - startTime print(f'Total time is : {endTime / 60:.2f} minutes') diff --git a/process/subprocesses/_13_aggregation.py b/process/subprocesses/_12_aggregation.py similarity index 78% rename from process/subprocesses/_13_aggregation.py rename to process/subprocesses/_12_aggregation.py index d498c38a..95bb51f6 100644 --- a/process/subprocesses/_13_aggregation.py +++ b/process/subprocesses/_12_aggregation.py @@ -17,21 +17,32 @@ # Set up project and region parameters for GHSCIC analyses from _project_setup import * +from geoalchemy2 import Geometry from setup_aggr import ( calc_cities_pop_pct_indicators, calc_grid_pct_sp_indicators, ) +from sqlalchemy import create_engine, inspect, text def main(): startTime = time.time() - + engine = create_engine( + f'postgresql://{db_user}:{db_pwd}@{db_host}/{db}', + pool_pre_ping=True, + connect_args={ + 'keepalives': 1, + 'keepalives_idle': 30, + 'keepalives_interval': 10, + 'keepalives_count': 5, + }, + ) print('Calculating small area neighbourhood grid indicators... '), # calculate within-city indicators weighted by sample points for each city # calc_grid_pct_sp_indicators take sample point stats within each city as # input and aggregate up to grid cell indicators by calculating the mean of # sample points stats within each hex - calc_grid_pct_sp_indicators(region_config, indicators) + calc_grid_pct_sp_indicators(engine, region_config, indicators) print('Done.') print('Calculating city summary indicators... '), @@ -43,7 +54,7 @@ def main(): # in addition to the population weighted averages, unweighted averages are # also included to reflect the spatial distribution of key walkability # measures (regardless of population distribution) - calc_cities_pop_pct_indicators(region_config, indicators) + calc_cities_pop_pct_indicators(engine, region_config, indicators) print('Done.') print( f'\nAggregation completed: {(time.time() - startTime)/60.0:.02f} mins', diff --git a/process/subprocesses/setup_aggr.py b/process/subprocesses/setup_aggr.py index 0f2a6138..d188182d 100644 --- a/process/subprocesses/setup_aggr.py +++ b/process/subprocesses/setup_aggr.py @@ -9,18 +9,18 @@ import pandas as pd -def calc_grid_pct_sp_indicators(region_dictionary, indicators): - """Caculate sample point weighted grid-level indicators within each city, and save to output geopackage. +def calc_grid_pct_sp_indicators(engine, region_config, indicators): + """Caculate sample point weighted grid-level indicators within each city. Parameters ---------- - region_dictionary: dict - gpkg: path of geopackage file with city resources for inputs and outputs + engine: sql connection + region_config: dict city_name: city full name - grid_summary_output: output name for CSV file and gpkg layer summarising grid results - e.g. {study_region}_grid_{resolution}m_yyyymmdd - city_summary_output: output name for CSV file and gpkg layer summarising city results - {study_region}_city_yyyymmdd + grid_summary_output: output name for CSV file and db layer summarising grid results + e.g. {study_region}_grid_{resolution}m + city_summary_output: output name for CSV file and db layer summarising city results + {study_region}_city indicators: dict output: dict sample_point_variables: list @@ -30,35 +30,39 @@ def calc_grid_pct_sp_indicators(region_dictionary, indicators): ------- String (indicating presumptive success) """ - gpkg = region_dictionary['gpkg'] - # read input geopackage with processed sample point and grid layer - gdf_samplepoint = gpd.read_file(gpkg, layer='samplePointsData') - gdf_samplepoint = gdf_samplepoint[ + # read sample point and grid layer + with engine.connect() as connection: + gdf_grid = gpd.read_postgis( + region_config['population_grid'], connection, index_col='grid_id', + ) + with engine.connect() as connection: + gdf_sample_points = gpd.read_postgis( + 'sample_point_indicators', connection, index_col='point_id', + ) + gdf_sample_points = gdf_sample_points[ ['grid_id'] + indicators['output']['sample_point_variables'] ] - gdf_samplepoint.columns = ['grid_id'] + indicators['output'][ + gdf_sample_points.columns = ['grid_id'] + indicators['output'][ 'neighbourhood_variables' ] - gdf_grid = gpd.read_file(gpkg, layer=region_dictionary['population_grid']) - # join urban sample point count to gdf_grid - samplepoint_count = gdf_samplepoint['grid_id'].value_counts() - samplepoint_count.name = 'urban_sample_point_count' - gdf_grid = gdf_grid.join(samplepoint_count, how='inner', on='grid_id') + sample_points_count = gdf_sample_points['grid_id'].value_counts() + sample_points_count.name = 'urban_sample_point_count' + gdf_grid = gdf_grid.join(sample_points_count, how='inner', on='grid_id') # perform aggregation functions to calculate sample point weighted grid cell indicators # to retain indicators which may be all NaN (eg cities absent GTFS data), numeric_only=False - gdf_samplepoint = gdf_samplepoint.groupby('grid_id').mean( + gdf_sample_points = gdf_sample_points.groupby('grid_id').mean( numeric_only=False, ) - gdf_grid = gdf_grid.join(gdf_samplepoint, how='left', on='grid_id') + gdf_grid = gdf_grid.join(gdf_sample_points, how='left', on='grid_id') # scale percentages from proportions pct_fields = [x for x in gdf_grid if x.startswith('pct_access')] gdf_grid[pct_fields] = gdf_grid[pct_fields] * 100 - gdf_grid['study_region'] = region_dictionary['name'] + gdf_grid['study_region'] = region_config['name'] grid_fields = ( indicators['output']['basic_attributes'] @@ -66,19 +70,23 @@ def calc_grid_pct_sp_indicators(region_dictionary, indicators): ) grid_fields = [x for x in grid_fields if x in gdf_grid.columns] - # save the gdf_grid to geopackage - gdf_grid[grid_fields + ['geometry']].to_file( - gpkg, layer=region_dictionary['grid_summary'], driver='GPKG', - ) + # save the grid indicators + with engine.connect() as connection: + gdf_grid[grid_fields + ['geom']].set_geometry('geom').to_postgis( + region_config['grid_summary'], + connection, + index=True, + if_exists='replace', + ) gdf_grid[grid_fields].to_csv( - f"{region_dictionary['region_dir']}/{region_dictionary['grid_summary']}.csv", + f"{region_config['region_dir']}/{region_config['grid_summary']}.csv", index=False, ) return 'Exported gridded small area summary statistics' -def calc_cities_pop_pct_indicators(region_dictionary, indicators): - """Calculate population-weighted city-level indicators, and save to output geopackage. +def calc_cities_pop_pct_indicators(engine, region_config, indicators): + """Calculate population-weighted city-level indicators. These indicators include: 'pop_pct_access_500m_fresh_food_markets', @@ -92,34 +100,37 @@ def calc_cities_pop_pct_indicators(region_dictionary, indicators): Parameters ---------- - region_dictionary: dict - gpkg: path of geopackage file with city resources for inputs and outputs + engine: sql connection + region_config: dict city_name: city full name - grid_summary_output: output name for CSV file and gpkg layer summarising grid results - e.g. {study_region}_grid_{resolution}m_yyyymmdd - city_summary_output: output name for CSV file and gpkg layer summarising city results - {study_region}_city_yyyymmdd + grid_summary_output: output name for CSV file and db layer summarising grid results + e.g. {study_region}_grid_{resolution}m + city_summary_output: output name for CSV file and db layer summarising city results + {study_region}_city indicators: dict - output: dict - sample_point_variables: list - neighbourhood_variables: list - extra_unweighted_vars: list - an optional list of variables to also calculate mean (unweighted) for + output: dict + sample_point_variables: list + neighbourhood_variables: list + extra_unweighted_vars: list + an optional list of variables to also calculate mean (unweighted) for Returns ------- String (indicating presumptive success) """ - gpkg = region_dictionary['gpkg'] - gdf_grid = gpd.read_file(gpkg, layer=region_dictionary['grid_summary']) - gdf_study_region = gpd.read_file(gpkg, layer='urban_study_region') - urban_covariates = gpd.read_file(gpkg, layer='urban_covariates') - + with engine.connect() as connection: + gdf_grid = gpd.read_postgis( + region_config['grid_summary'], connection, index_col='grid_id', + ) + with engine.connect() as connection: + gdf_study_region = gpd.read_postgis('urban_study_region', connection) + with engine.connect() as connection: + urban_covariates = pd.read_sql_table('urban_covariates', connection) # calculate the sum of urban sample point counts for city urban_covariates['urban_sample_point_count'] = gdf_grid[ 'urban_sample_point_count' ].sum() - urban_covariates['geometry'] = gdf_study_region['geometry'] + urban_covariates['geom'] = gdf_study_region['geom'] urban_covariates.crs = gdf_study_region.crs # Map differences in grid names to city names @@ -151,14 +162,16 @@ def calc_cities_pop_pct_indicators(region_dictionary, indicators): ) # order geometry as final column urban_covariates = urban_covariates[ - [x for x in urban_covariates.columns if x != 'geometry'] + ['geometry'] + [x for x in urban_covariates.columns if x != 'geom'] + ['geom'] ] - urban_covariates.to_file( - gpkg, layer=region_dictionary['city_summary'], driver='GPKG', - ) + urban_covariates = urban_covariates.set_geometry('geom') + with engine.connect() as connection: + urban_covariates.to_postgis( + region_config['city_summary'], connection, if_exists='replace', + ) urban_covariates[ - [x for x in urban_covariates.columns if x != 'geometry'] + [x for x in urban_covariates.columns if x != 'geom'] ].to_csv( - f"{region_dictionary['region_dir']}/{region_dictionary['city_summary']}.csv", + f"{region_config['region_dir']}/{region_config['city_summary']}.csv", index=False, ) From 833067a85dd820887a6e0b1ea4fad68bc4f75a86 Mon Sep 17 00:00:00 2001 From: Carl Higgs Date: Wed, 29 Mar 2023 22:33:13 +1100 Subject: [PATCH 11/26] further migrated analysis from using geopackage to using SQL as per #216, updating the generate resources script (data export to geopackage and csv occurs then) --- process/2_analyse_region.py | 4 +- process/3_generate_resources.py | 48 ++++++++++++++++--- .../_11_neighbourhood_analysis.py | 5 +- process/subprocesses/_project_setup.py | 1 + process/subprocesses/_report_functions.py | 29 +++++++++++ process/subprocesses/setup_aggr.py | 2 +- 6 files changed, 76 insertions(+), 13 deletions(-) diff --git a/process/2_analyse_region.py b/process/2_analyse_region.py index 31b0da04..d8ed8c86 100644 --- a/process/2_analyse_region.py +++ b/process/2_analyse_region.py @@ -87,13 +87,13 @@ width=float('inf'), ) print_autobreak( - f'A dated copy of project and region parameters has been saved as {region_dir}/_parameters.yml.\n'.replace( + f'A dated copy of project and region parameters has been saved as {region_dir}/_parameters.yml.'.replace( '/home/ghsci/work/', '', ), ) print_autobreak( - f'Analysis time zone: {analysis_timezone} (to set time zone for where you are, edit config.yml)', + f'\nAnalysis time zone: {analysis_timezone} (to set time zone for where you are, edit config.yml)\n', ) start_analysis = time.time() print(f"Analysis start:\t{time.strftime('%Y-%m-%d_%H%M')}") diff --git a/process/3_generate_resources.py b/process/3_generate_resources.py index 99efd1ab..42101664 100644 --- a/process/3_generate_resources.py +++ b/process/3_generate_resources.py @@ -8,16 +8,27 @@ import shutil import sys -# import and set up functions -import subprocesses._report_functions as _report_functions from subprocesses._project_setup import ( codename, + db, + db_host, + db_pwd, + db_user, folder_path, + gtfs, indicators, policies, region_config, region_names, ) + +# import and set up functions +from subprocesses._report_functions import ( + generate_report_for_language, + get_and_setup_language_cities, + postgis_to_csv, + postgis_to_geopackage, +) from subprocesses._utils import get_terminal_columns, print_autobreak @@ -55,8 +66,33 @@ def main(): print(f" __{region_config['name']}__{codename}_processing_log.txt") print('\nData files') print(f" {os.path.basename(region_config['gpkg'])}") - print(f" {region_config['grid_summary']}.csv") - print(f" {region_config['city_summary']}.csv") + tables = [ + region_config['city_summary'], + region_config['grid_summary'], + region_config['point_summary'], + 'aos_public_osm', + 'dest_type', + 'destinations', + region_config['intersections_table'], + 'edges', + 'nodes', + ] + if region_config['gtfs_feeds'] is not None: + tables = tables + [gtfs['headway']] + postgis_to_geopackage( + region_config['gpkg'], db_host, db_user, db, db_pwd, tables, + ) + for layer in ['city', 'grid']: + print( + postgis_to_csv( + f" {region_config[f'{layer}_summary']}.csv", + db_host, + db_user, + db, + db_pwd, + region_config[f'{layer}_summary'], + ), + ) # Generate data dictionary print('\nData dictionaries') required_assets = [ @@ -70,14 +106,14 @@ def main(): ) print(f' {file}') # Generate reports - languages = _report_functions.get_and_setup_language_cities(config) + languages = get_and_setup_language_cities(config) if languages == []: print_autobreak( ' - Report generation skippped. Please confirm that city and its corresponding codename have been configured in the city details and language worksheets of configuration/_report_configuration.xlsx.', ) else: for language in languages: - _report_functions.generate_report_for_language( + generate_report_for_language( config, language, indicators, policies, ) print_autobreak( diff --git a/process/subprocesses/_11_neighbourhood_analysis.py b/process/subprocesses/_11_neighbourhood_analysis.py index 6e2420c4..d469400a 100644 --- a/process/subprocesses/_11_neighbourhood_analysis.py +++ b/process/subprocesses/_11_neighbourhood_analysis.py @@ -324,10 +324,7 @@ def main(): sample_points = sample_points.set_geometry('geom') with engine.connect() as connection: sample_points.to_postgis( - 'sample_point_indicators', - connection, - index=True, - if_exists='replace', + point_summary, connection, index=True, if_exists='replace', ) endTime = time.time() - startTime print(f'Total time is : {endTime / 60:.2f} minutes') diff --git a/process/subprocesses/_project_setup.py b/process/subprocesses/_project_setup.py index 9777e1ff..15d28249 100644 --- a/process/subprocesses/_project_setup.py +++ b/process/subprocesses/_project_setup.py @@ -187,6 +187,7 @@ def region_dictionary_setup(region, region_config, config, folder_path): r[ 'gpkg' ] = f'{r["region_dir"]}/{r["study_region"]}_{study_buffer}m_buffer.gpkg' + r['point_summary'] = f'{r["study_region"]}_sample_points' r['grid_summary'] = f'{r["study_region"]}_grid_{resolution}' r['city_summary'] = f'{r["study_region"]}_region' if 'policy_review' in r: diff --git a/process/subprocesses/_report_functions.py b/process/subprocesses/_report_functions.py index 2c490df9..dd5416f6 100644 --- a/process/subprocesses/_report_functions.py +++ b/process/subprocesses/_report_functions.py @@ -5,6 +5,7 @@ """ import json import os +import subprocess as sp import time from textwrap import wrap @@ -27,6 +28,34 @@ from subprocesses.batlow import batlow_map as cmap +def postgis_to_csv(file, db_host, db_user, db, db_pwd, table): + """Export table from PostGIS database to CSV.""" + command = ( + f'ogr2ogr -f "CSV" {file} ' + f'PG:"host={db_host} user={db_user} dbname={db} password={db_pwd}" ' + f' {table} ' + ) + sp.call(command, shell=True) + return file + + +def postgis_to_geopackage(gpkg, db_host, db_user, db, db_pwd, tables): + """Export selection of tables from PostGIS database to geopackage.""" + try: + os.remove(gpkg) + except FileNotFoundError: + pass + + for table in tables: + print(f' - {table}') + command = ( + f'ogr2ogr -update -overwrite -lco overwrite=yes -f GPKG {gpkg} ' + f'PG:"host={db_host} user={db_user} dbname={db} password={db_pwd}" ' + f' {table} ' + ) + sp.call(command, shell=True) + + def get_and_setup_language_cities(config): """Setup and return languages for given configuration.""" if config.auto_language: diff --git a/process/subprocesses/setup_aggr.py b/process/subprocesses/setup_aggr.py index d188182d..e5d18366 100644 --- a/process/subprocesses/setup_aggr.py +++ b/process/subprocesses/setup_aggr.py @@ -37,7 +37,7 @@ def calc_grid_pct_sp_indicators(engine, region_config, indicators): ) with engine.connect() as connection: gdf_sample_points = gpd.read_postgis( - 'sample_point_indicators', connection, index_col='point_id', + region_config['point_summary'], connection, index_col='point_id', ) gdf_sample_points = gdf_sample_points[ ['grid_id'] + indicators['output']['sample_point_variables'] From b60f1491f3c5a3e3a11b78e4f7ae0ed39e5ed4f1 Mon Sep 17 00:00:00 2001 From: Carl Higgs Date: Thu, 30 Mar 2023 00:02:25 +1100 Subject: [PATCH 12/26] rewrote network generation/analysis to use SQL instead of read/write to graphml; we were already writing unprojected nodes/edges to the geodatabase --- by instead writing the projected ones, we could reload these in network analysis step via graph_from_ gdfs using OSMnx and avoid reading/writing graphml as an extra step. This addresses #216. There is a n oddity -- while end results are basically similar (some slight variation in walkability), the number of edges (presumably returned from graph_from_gdfs) was approximately halved for Las Palmas, conseque ntly the sample points are approximately halved too. More exploration required to understand why this is the case, but perhaps its to do with network directionality... In any case - results are mostly t he same while things run faster/more efficiently/less storage space. So retaining code using this com mit and then will explore. --- process/2_analyse_region.py | 2 +- .../_03_create_network_resources.py | 86 +++++++++---------- .../_11_neighbourhood_analysis.py | 15 ++-- 3 files changed, 54 insertions(+), 49 deletions(-) diff --git a/process/2_analyse_region.py b/process/2_analyse_region.py index d8ed8c86..9e1736ae 100644 --- a/process/2_analyse_region.py +++ b/process/2_analyse_region.py @@ -93,7 +93,7 @@ ) print_autobreak( - f'\nAnalysis time zone: {analysis_timezone} (to set time zone for where you are, edit config.yml)\n', + f'\nAnalysis time zone: {analysis_timezone} (to set time zone for where you are, edit config.yml)\n\n', ) start_analysis = time.time() print(f"Analysis start:\t{time.strftime('%Y-%m-%d_%H%M')}") diff --git a/process/subprocesses/_03_create_network_resources.py b/process/subprocesses/_03_create_network_resources.py index 5e8e839a..203f689c 100644 --- a/process/subprocesses/_03_create_network_resources.py +++ b/process/subprocesses/_03_create_network_resources.py @@ -23,12 +23,20 @@ from tqdm import tqdm +def gdf_to_postgis_format(gdf, engine, table, rename_geometry='geom'): + """Sets geometry with optional new name (e.g. 'geom') and writes to PostGIS, returning the reformatted GeoDataFrame.""" + gdf.columns = [ + rename_geometry if x == 'geometry' else x for x in gdf.columns + ] + gdf = gdf.set_geometry(rename_geometry) + with engine.connect() as connection: + gdf.to_postgis( + table, connection, index=True, + ) + + def derive_routable_network( - engine, - network_study_region, - graphml, - osmnx_retain_all, - network_polygon_iteration, + engine, network_study_region, osmnx_retain_all, network_polygon_iteration, ): print( 'Creating and saving pedestrian roads network... ', end='', flush=True, @@ -86,7 +94,6 @@ def derive_routable_network( # induce a subgraph on those nodes G = nx.MultiDiGraph(G.subgraph(nodes)) - ox.save_graphml(G, filepath=graphml, gephi=False) print('Done.') return G @@ -96,7 +103,6 @@ def main(): start = time.time() script = os.path.basename(sys.argv[0]) task = 'Create network resources' - engine = create_engine( f'postgresql://{db_user}:{db_pwd}@{db_host}/{db}', future=True, @@ -141,59 +147,53 @@ def main(): sys.exit( "Please ensure the osmnx_retain_all has been defined for this region with values of either 'False' or 'True'", ) - if os.path.isfile(graphml): + if db_contents.has_table('nodes') and db_contents.has_table('edges'): print( f'Network "pedestrian" for {network_study_region} has already been processed.', ) print('\nLoading the pedestrian network.') - G = ox.load_graphml(graphml) + with engine.connect() as connection: + nodes = gpd.read_postgis( + 'nodes', connection, index_col='osmid', + ) + with engine.connect() as connection: + edges = gpd.read_postgis( + 'edges', connection, index_col=['u', 'v', 'key'], + ) + G_proj = ox.graph_from_gdfs(nodes, edges, graph_attrs=None) else: G = derive_routable_network( engine, network_study_region, - graphml, network['osmnx_retain_all'], network['polygon_iteration'], ) - - if not ( - db_contents.has_table('nodes') and db_contents.has_table('edges') - ): + print(' - Remove unnecessary key data from edges') + att_list = { + k + for n in G.edges + for k in G.edges[n].keys() + if k not in ['osmid', 'length'] + } + capture_output = [ + [d.pop(att, None) for att in att_list] + for n1, n2, d in tqdm(G.edges(data=True), desc=' ' * 18) + ] + del capture_output + print(' - Project graph') + G_proj = ox.project_graph(G, to_crs=crs['srid']) + if G_proj.is_directed(): + G_proj = G_proj.to_undirected() print( - f'\nPrepare and copy nodes and edges to postgis in project CRS {crs["srid"]}... ', + ' - Save projected graph edges and node GeoDataFrames to PostGIS', ) - nodes, edges = ox.graph_to_gdfs(G) - with engine.connect() as connection: - nodes.rename_geometry('geom').to_crs(crs['srid']).to_postgis( - 'nodes', connection, index=True, - ) - edges.rename_geometry('geom').to_crs(crs['srid']).to_postgis( - 'edges', connection, index=True, - ) - + nodes, edges = ox.graph_to_gdfs(G_proj) + gdf_to_postgis_format(nodes, engine, 'nodes') + gdf_to_postgis_format(edges, engine, 'edges') if not db_contents.has_table(intersections_table): ## Copy clean intersections to postgis print('\nPrepare and copy clean intersections to postgis... ') # Clean intersections - if not os.path.exists(graphml_proj): - print(' - Remove unnecessary key data from edges') - att_list = { - k - for n in G.edges - for k in G.edges[n].keys() - if k not in ['osmid', 'length'] - } - capture_output = [ - [d.pop(att, None) for att in att_list] - for n1, n2, d in tqdm(G.edges(data=True), desc=' ' * 18) - ] - del capture_output - G_proj = ox.project_graph(G, to_crs=crs['srid']) - if G_proj.is_directed(): - G_proj = G_proj.to_undirected() - ox.save_graphml(G_proj, filepath=graphml_proj, gephi=False) - else: - G_proj = ox.load_graphml(graphml_proj) intersections = ox.consolidate_intersections( G_proj, tolerance=network['intersection_tolerance'], diff --git a/process/subprocesses/_11_neighbourhood_analysis.py b/process/subprocesses/_11_neighbourhood_analysis.py index d469400a..0c386a8a 100644 --- a/process/subprocesses/_11_neighbourhood_analysis.py +++ b/process/subprocesses/_11_neighbourhood_analysis.py @@ -1,11 +1,10 @@ """ Neighbourhood analysis. -This script creates neighbourhood indicators for sample points. +This script creates neighbourhood indicators for sample points. To run it, supply a study region code name. + +It assumes network projected network nodes and edges have been generated and stored in a PostGIS database, which can be read from as a GeoDataFrame to generate a graph. -To run it, supply a study region code name. The list of configured codenames is displayed -if run with no region name as an argument. -It is to be run after 01_study_region_setup.py, which collates and processes the required data. Once run, the sample points may be aggregated to a neighbourhood small area grid and for overall city summaries by running 03_aggregation.py. @@ -57,7 +56,13 @@ def main(): }, ) db_contents = inspect(engine) - G_proj = ox.load_graphml(graphml_proj) + with engine.connect() as connection: + nodes = gpd.read_postgis('nodes', connection, index_col='osmid') + with engine.connect() as connection: + edges = gpd.read_postgis( + 'edges', connection, index_col=['u', 'v', 'key'], + ) + G_proj = ox.graph_from_gdfs(nodes, edges, graph_attrs=None) with engine.connect() as connection: grid = gpd.read_postgis( population_grid, connection, index_col='grid_id', From d856165fa809514d321ef86df1a5d4d4d49e7ba2 Mon Sep 17 00:00:00 2001 From: Carl Higgs Date: Fri, 31 Mar 2023 09:55:48 +1100 Subject: [PATCH 13/26] updated data dictionary (long form, rather than columns for scale) --- .../assets/output_data_dictionary.csv | 76 ++++++++++-------- .../assets/output_data_dictionary.xlsx | Bin 13964 -> 13160 bytes 2 files changed, 41 insertions(+), 35 deletions(-) diff --git a/process/configuration/assets/output_data_dictionary.csv b/process/configuration/assets/output_data_dictionary.csv index 5150e8db..5c84bbd3 100644 --- a/process/configuration/assets/output_data_dictionary.csv +++ b/process/configuration/assets/output_data_dictionary.csv @@ -1,35 +1,41 @@ -Type of measure,Measure or data item description,City data,Grid data -Study region information,Continent,Continent,Continent -Study region information,Country,Country,Country -Study region information,2-letter country code,ISO 3166-1 alpha-2,ISO 3166-1 alpha-2 -Study region information,Study region,City,study_region -Derived study region statistics,"Area (km²; accounting for urban restrictions, if applied)",Area (sqkm),Area (sqkm) -Derived study region statistics,"Population estimate, as per configured population data source",Population estimate,Population estimate -Derived study region statistics,Population per km²,Population per sqkm,Population per sqkm -Derived study region statistics,Intersection count (following consolidation based on intersection tolerance parameter in region configuration),Intersections,Intersections -Derived study region statistics,Intersections per km²,Intersections per sqkm,Intersections per sqkm -,Linked urban covariates (e.g. estimates for 2015 from GHSL UCDB on air pollution),, -Linked covariates,"Total emission of CO 2 from the transport sector, using non-short-cycle-organic fuels in 2015",E_EC2E_T15,E_EC2E_T15 -Linked covariates,"Total emission of CO 2 from the energy sector, using short-cycle-organic fuels in 2015",E_EC2O_T15,E_EC2O_T15 -Linked covariates,Total emission of PM 2.5 from the transport sector in 2015,E_EPM2_T15,E_EPM2_T15 -Linked covariates,Total concertation of PM 2.5 for reference epoch 2014,E_CPM2_T14,E_CPM2_T14 -Analytical statistic,Sample points used in this analysis (generated along pedestrian network for populated grid areas),urban_sample_point_count,urban_sample_point_count -,"Access (for city, population percentage /100; for grid, an access score /1) within 500 metres to:",, -Indicator estimates,fresh food market / supermarket (source: OpenStreetMap or custom),pop_pct_access_500m_fresh_food_market_score,access_500m_fresh_food_market_score -Indicator estimates,convenience store (source: OpenStreetMap or custom),pop_pct_access_500m_convenience_score,access_500m_convenience_score -Indicator estimates,public transport (source: OpenStreetMap or custom),pop_pct_access_500m_pt_osm_any_score,access_500m_pt_osm_any_score -Indicator estimates,any public open space (source: OpenStreetMap),pop_pct_access_500m_public_open_space_any_score,access_500m_public_open_space_any_score -Indicator estimates,public open space larger than 1.5 hectares (source: OpenStreetMap),pop_pct_access_500m_public_open_space_large_score,access_500m_public_open_space_large_score -Indicator estimates,public transport (source: GTFS),pop_pct_access_500m_pt_gtfs_any_score,access_500m_pt_gtfs_any_score -Indicator estimates,public transport with average daytime weekday service frequency of 30 minutes or better (source: GTFS),pop_pct_access_500m_pt_gtfs_freq_30_score,access_500m_pt_gtfs_freq_30_score -Indicator estimates,public transport with average daytime weekday service frequency of 20 minutes or better (source: GTFS),pop_pct_access_500m_pt_gtfs_freq_20_score,access_500m_pt_gtfs_freq_20_score -Indicator estimates,any public transport stop (source: GTFS or OpenStreetMap/custom),pop_pct_access_500m_pt_any_score,access_500m_pt_any_score -Indicator estimates,Average walkable neighbourhood poulation density (population weighted) *,pop_nh_pop_density, -Indicator estimates,Average walkable neighbourhood intersection density (population weighted) *,pop_nh_intersection_density, -Indicator estimates,Average daily living score (population weighted) *,pop_daily_living, -Indicator estimates,Average walkability (population weighted) *,pop_walkability, -Indicator estimates,Average walkable neighbourhood poulation density ,local_nh_population_density,local_nh_population_density -Indicator estimates,Average walkable neighbourhood intersection density ,local_nh_intersection_density,local_nh_intersection_density -Indicator estimates,Average daily living score ,local_daily_living,local_daily_living -Indicator estimates,Average walkability ,local_walkability,local_walkability -,,, +Category,Description,Variable,Scale +Study region information,Continent,Continent,city +Study region information,Country,Country,city +Study region information,2-letter country code,ISO 3166-1 alpha-2,city +Study region information,Study region,City,"city, grid" +Derived study region statistics,"Area (km²; accounting for urban restrictions, if applied)",Area (sqkm),"city, grid" +Derived study region statistics,"Population estimate, as per configured population data source",Population estimate,"city, grid" +Derived study region statistics,Population per km²,Population per sqkm,"city, grid" +Derived study region statistics,Intersection count (following consolidation based on intersection tolerance parameter in region configuration),Intersections,"city, grid" +Derived study region statistics,Intersections per km²,Intersections per sqkm,"city, grid" +Linked covariates,"Total emission of CO 2 from the transport sector, using non-short-cycle-organic fuels in 2015",E_EC2E_T15,city +Linked covariates,"Total emission of CO 2 from the energy sector, using short-cycle-organic fuels in 2015",E_EC2O_T15,city +Linked covariates,Total emission of PM 2.5 from the transport sector in 2015,E_EPM2_T15,city +Linked covariates,Total concertation of PM 2.5 for reference epoch 2014,E_CPM2_T14,city +Analytical statistic,Sample points used in this analysis (generated along pedestrian network for populated grid areas),urban_sample_point_count,"city, grid" +Indicator estimates,Score (/1) for access within 500 m to a fresh food market / supermarket (source: OpenStreetMap or custom),access_500m_fresh_food_market_score,grid +Indicator estimates,Score (/1) for access within 500 m to a convenience store (source: OpenStreetMap or custom),access_500m_convenience_score,grid +Indicator estimates,Score (/1) for access within 500 m to a public transport (source: OpenStreetMap or custom),access_500m_pt_osm_any_score,grid +Indicator estimates,Score (/1) for access within 500 m to a any public open space (source: OpenStreetMap),access_500m_public_open_space_any_score,grid +Indicator estimates,Score (/1) for access within 500 m to a public open space larger than 1.5 hectares (source: OpenStreetMap),access_500m_public_open_space_large_score,grid +Indicator estimates,Score (/1) for access within 500 m to a public transport (source: GTFS),access_500m_pt_gtfs_any_score,grid +Indicator estimates,Score (/1) for access within 500 m to a public transport with average daytime weekday service frequency of 30 minutes or better (source: GTFS),access_500m_pt_gtfs_freq_30_score,grid +Indicator estimates,Score (/1) for access within 500 m to a public transport with average daytime weekday service frequency of 20 minutes or better (source: GTFS),access_500m_pt_gtfs_freq_20_score,grid +Indicator estimates,Score (/1) for access within 500 m to a any public transport stop (source: GTFS or OpenStreetMap/custom),access_500m_pt_any_score,grid +Indicator estimates,Percentage of population with access within 500 m to a fresh food market / supermarket (source: OpenStreetMap or custom),pop_pct_access_500m_fresh_food_market_score,city +Indicator estimates,Percentage of population with access within 500 m to a convenience store (source: OpenStreetMap or custom),pop_pct_access_500m_convenience_score,city +Indicator estimates,Percentage of population with access within 500 m to a public transport (source: OpenStreetMap or custom),pop_pct_access_500m_pt_osm_any_score,city +Indicator estimates,Percentage of population with access within 500 m to a any public open space (source: OpenStreetMap),pop_pct_access_500m_public_open_space_any_score,city +Indicator estimates,Percentage of population with access within 500 m to a public open space larger than 1.5 hectares (source: OpenStreetMap),pop_pct_access_500m_public_open_space_large_score,city +Indicator estimates,Percentage of population with access within 500 m to a public transport (source: GTFS),pop_pct_access_500m_pt_gtfs_any_score,city +Indicator estimates,Percentage of population with access within 500 m to a public transport with average daytime weekday service frequency of 30 minutes or better (source: GTFS),pop_pct_access_500m_pt_gtfs_freq_30_score,city +Indicator estimates,Percentage of population with access within 500 m to a public transport with average daytime weekday service frequency of 20 minutes or better (source: GTFS),pop_pct_access_500m_pt_gtfs_freq_20_score,city +Indicator estimates,Percentage of population with access within 500 m to a any public transport stop (source: GTFS or OpenStreetMap/custom),pop_pct_access_500m_pt_any_score,city +Indicator estimates,Average walkable neighbourhood poulation density (population weighted) ,pop_nh_pop_density,city +Indicator estimates,Average walkable neighbourhood intersection density (population weighted) ,pop_nh_intersection_density,city +Indicator estimates,Average daily living score (population weighted),pop_daily_living,city +Indicator estimates,Average walkability (population weighted) ,pop_walkability,city +Indicator estimates,Average walkable neighbourhood poulation density ,local_nh_population_density,"city, grid" +Indicator estimates,Average walkable neighbourhood intersection density ,local_nh_intersection_density,"city, grid" +Indicator estimates,Average daily living score ,local_daily_living,"city, grid" +Indicator estimates,Average walkability ,local_walkability,"city, grid" diff --git a/process/configuration/assets/output_data_dictionary.xlsx b/process/configuration/assets/output_data_dictionary.xlsx index 845a4eebca74b9c3ac284d80bb5ba4f916e2c1be..bafe6f5820d3b806937838bfc46c82ff668f9f14 100644 GIT binary patch delta 5912 zcmZ8lbyyV4)`uk|q*JF!!ex|NV_lul`hT|fyXF7Nx@ z?|bk4=8t*iIrBSnV&=?q&WU$Eu&w`zi4B&Q2hX9Qph&>(FsZc#*VWsGEpT(ii;=i_!<6TB4C4_-aH-_OD>-k_m~xAQrc z{vK|yHk?Zv7FY+*;rq-RH_7+vIPomuDsdTVS1k{EIIXn4?GyJ*B8<`IyAGI8$~yC~ zcPPJP2p5(|4K{2vRkL^Y$1zqO*q$ziwo1uJ)<;$D6MYq2=I>bbGER=hTwS-ATExD- zK@JH&o%AAh3Kp%hDcywDeS0Qj6gW$NJJb?+)=78_8r%d6vMdn)j5(yCGsJ#?Ro^nF zJ>W#0(7*0ysL|vmLPx;t*CE+fra5(na)`@WA%zVvYt5vN6%`(Yt1hGHTVGCpitAw~ z#*3dLpQ#qTgCrYOkG^y4Q(NeGsYbm9$_|rA>O>);6U%|Jlg!8pBO!T3Zow+hpCI+SrJZv!ikmCnwICno>l~dU_wMq`_(JT zbkTV=L=^#aHUoDK z<1T2igRRry$;!;f&Mx1#Ja?~drR=rVN)2C;8G0)RfK53V>ucC-UnsM z0|Lf`9py)ER&#_9khZa&pUOVEHni;h@+>%MhHrq7z**M!hf>uUc7v@9MW>Hss~pAB z&A6Nkp}36iUcuLioYm}o!#qE6i{I-p_erM`disJU_F1f?kheHmpgti#j2jaPH+GeWveP4n#oQ+f5TzElpLc|*UH zuMD-eo*~9=TMx00=+WLKU@vTQXgfkjzL>!_Fv(!^0{D#x9995Mod;tY(-RaFc1{=_ z9s_tKF`JJt3c>x0HW?u%CtQ%Ms~~)$6sY;j?<9lKpf_WA(n9ywngn5AZAw*w9>B@! zVdK^+$?l3>)98st))*Ue&o&V*6&vollfe1O(Q+QKGW9ZPQyQN2&NIRG{7+yJ38m&a zve1vo(+BZ7gH4r3@ASe7mLWhvQEesGie7Nxaz}@*Oq?mpI}v183^#}CaAtCq5oH69 zSVXFIAZmh`9O>7u1z?jiJv008wQv%hlgpZk5__VLTS6jJQYi-ErAaHVxjHIeAYk;G zwz;Gr)SfL?m4^o|=%$JyPC#y#eBt`=ClG?LkK4=NrmfvFV(4>j$>R z;H^g{)Q?~twJ4Vcnp*GZQ?9ed1wC6N=oM@&_NH5k3;Nh_u{(QF6m3&KdADH=;ug(w zOHVB=7$5Fa*N3RD-c<7zAZQYQ>u$`ZN``23`F+PT7qXK+i92bASl&jg%ze{;D_JKaz~l7uJGwhX)Cg7Pbu&7`g@c?;>emT` zqHI)wQfy8%k!`LM@@HFe4l!RbfOe9)h(F5Wemk6Pp3^&J=I!UxjlUvGQ4DT%I39Y? zejP!)BKz#l=I z_n1$2i_I>L_lwT+3b$8uXEr-BZ(lw;R4nOvH4iz&Bqta|ak5|1JBw(i^~Q^C zQit%xNTQ_kgvs-gIQM`2PJ>omXk*m;(60pX<6Iryil?*2W(mlg2G@UQcM2718aFQ; zCWXI#z679n$4f$`PhPvB?$q0<%(k3n7IilJVw37^=n+v`0x4(PsmN{H zHH`vpH5GrMyEmJ$+}Kn7SW@FF36T2;xx&v!qEwa}9#FuTlYh6vOcq-?&C9LB`elCr ze=pTr+j^}MQIlaXtI_7$Q=6CD9t3RWRufMRCsk(h+|1|_%c|YmA|RSvIS@}43J&2H zqUYVpnU)m&yCAzVDSV0`=!{;F9W$L3UXHkIdQ)LR1nZql01Z!p)JTPw@cq_@P%Lpv zhBkR9-IFUDb$+hggecMPjNxjrA0oVwkHA_^mg3o^;bAb+4+SoZ2JZ#g#s1Qo9-(`Z zucdBV36D*HihjolS5s2tKCo7g2pG3rn{qu9+;$qUuIAKF)tgona`=)cvnpSmIiMye zPA%mwPlKLInu4FdprFo7CqKfIFgd72GxJ22U81u)bO8+7)4h`N-n+z!*@JjfC<{R*RZ0o0c6KWwCtV5; zM4hYxpIK^)Wp{mn>>11azFG)R&cpQe6jm;7bSG0^x4{xWjwLpEWtz#U zog=Y2p_oL0Ko0ZlvgWnAr+&0`0Z*jV&i3m1GDy*xL_UgiGX6pWCe$zUO2s#c{ORXK z_CE)I`mF0=8(;&9^&M*%Du&k^QspMVuQ6(&kNS4t9G)F;Ii+2muMX;l2hlU>rRH!R zOzSEnz0tS$+ca!kV^%vHaWAsj?HS$0zdWPc2hbnM497PC|E~~%-==n9aP%9~T_O`! zyRz5&Gr9_|?`B8;4gK3tx}@4ZTZjGsV2}KX{%;ji>o>cVCP}>xxJuYR)Y1O!|NoQn zpRm6yQKA?~WC#SD?m20vozq&^|K*CoBG0k{62~&HJci58T{F~g!nOSK0N3A2jg3KP^%MzBD6p{! zXQKx6Mcm)wev(g=&?0=u^)y{pSusz+PYAI;9=mBD^4-5+4OzoVbg8J#gkoP#Nyb-< z>-O%5_vY6M*u`o(f8fE=hqC#-0XyD`2j4K&P9)C3@k%jK-&d1#y(r7-bjfwCVEC0; zRqp2<-BWs#D{e_OUVN~eo@8vY>XVZN8v2+2B zZkM@me_W;g?Doyzvg&-jGI;Q2eLu{b7i*RhI;Q~?$S4R2>V|)AC!W;YFVRDYEIX?D zgb*f>x7ElZ;jm=3$toOE**3*9=L6u?)Xqq#8;=XtYW@1IQx*Q zfw?)rzUZ_$fREj8Eld5D#gDj!RDv1&Zd-G@KF)$9;y#M+PF^Pj8+_MQNkdxzqGl-f z9KA`!yW022H*fI}WQ7_*urJ78;_F9B?e`NlwA1^Hd{uRVCd`N*6CEFO7<^3$yp2}4 z`HTQ*G0^KVYv~5rDHj6+)|_VY;QhCFc8;9(<pH`0K4SHV+pL$;2Sg*;|N{}th zwLQPhH}>UfxazE}DbHW0W|hxh-5qnhNGv6AYJGpu zXZGY(eJeosQvvIehi5l9bF=?i3b?2zWBKbX{ZaxyP(GYs8|>R6bjCi9NjmH+6z4JJG*U~i(u=@clQ}a_}wj&3osgauUq>VZ4VXS{i(v*InSPS}dgQ~kkurCyy zXLgl+GGaz+jZLR2{@Zy7t!Zf0u6H1i_n$c7*C3UA><+z2*&4%Pq42yxEz546=a|9=190WJ%6W>Wf`^^o8p4 zQv#?_nJg@fohQ1`%nQUeB8c5C|C0^0Y*oDQoBt8AV1cRDG8an--G9=G@ko4QY9HaqTcT&s=5^FO22)8(V2*X5_nCyV$A~1C{HlLg^)(JrKTx#+ z489L}!{YZ&zE)B#e5g0o=mwhR-(H)=Q9>>jFrBCPJ=1M#RWa${Jngq&gE(`($9Wp3 zUDGepq8XojkLW~(Qr)3azx5cqk2|zEKy9Wn&B;>c!io(E$=W9&WF{t;bw$y8*;jp= zu`6fQq>sHn6w%NHVxL{ zDEyo>qv=|V)6I1$qHSzQky<3ClH#^YZf@p>fh{u_U)?gu>|z$L?{9KoA?= zc-xgfKU!153)3Ivy1`E*f&P7fc1tF0iSgMDtTwU_vXn113ILbp8~)s*?(OQ9^5c}8 zehri88|-_rH&`6hG43w@2Tkd+`FvBug&5nfUA_?+k%uia14~12W_okLb6PUeD)Wq< z^>Mte0mF_li3G{688*)dz6~IG#>jX`QOU@uDpx5qk~A!4EO`1Qb%MEQUVPaZ-%<{w zVPm&f0$`R9rEDo`a+SHY-ojd7MX?t_jKVE|h60(|EhKKMB@U6k*sKh8Hwr~8wcsv6 zAzO0duN1>DLMi|Sqmn&1JhaQ8vUQkTyQ=E^)T;{TlSu0NaIJKL9=s;wW%Ht*dj6|E z?(#{kNSnE`vU;ykVQw3u>c034?H+4SP zB;{VXP!9w!Er;3`EA@ltzPD#SR=V?%Jy&e z0v3wHSE4086fTR<<)`4Uu@~4qR;dlzVeKS*Oiu#yBi9gE-OqLKtMOGOc*_4guy@`2KCPSe zNzGkwya>48@_T8rzIu8Jg$EDGQl=0%kw?}bW6pJAcaNgY$6LaVQA}IB@7$Y~<}ac@ z??)V-62|$MT}Mtg35ZCeSKjyAx1!Y2dLf^?IlT!C=)_?SwJ0BY-JriA>e~vf+dbXg zh{ABZe2FGh)^cxqXDVuP8wt9mfNf%uHukarP;t`3S7Q7hOC=ow*aRyOe4PGCt)Gu@ zP+FqUp#tj}p%Vu)1EX%mO9paG1Jp`ox&j5+rwXC+L-pP^K;FIxK7J%tyvloo78H|I zfY2az>YV;&^74jaXyl7%FPqQDgy&2@-D8z=K6dp{B^sD*V^SPT42l1cZ=Y7D#EiSD zEZ?+RJ#2oWflTI@5Z7$(2Zxl-ZSiC+H4X3;Br?6$&8AIMqzHYo7u;fISWcEibA8I# z{?lCrlgN^p`9gSr7m^GjWdhlMSmI3`KJGgurDV!pMDmH>D2Umx(KRI0OqkO>zzw?q_MH(<)Q&VnysK0Z1&!xsgHba+sLyZ zAOBcnuJU{ZF{Z4+J98ViK%Q!lJ(1-HM5Mg*wGoeIOV=A=d&(axWuF$x$|r$-&v%GE<+8aj}T z_`60$L(x!1Lx=sq#KB<3kJ3?W+~s0N1*=E!kw$K)-V&4`rU7&9mU zMm;EPh$_moL#|w}!Z+sj>jg(y<%|7D(@aqt z2ZDqnuXqU?T+oN&K-bZk#TMu3!QiLq(gTR>^7Ni6vnm&T8jUn8wRTKf$K0BX+ODAn z4wZ2BZI?(g0iW%bZlLTc{bWvhm8{hz873drRAiA{B{@FQpKa8U?*yF_`M&%8oTG3a*5!t4585sDE^NK{GoHc31YZH%NnxRatknO%1o#Jrgnbrdp!p{QK|vw?XY+BDMGp(3 zCx@j962h2-r~oG{uqhT+9t~ww0PqosiGuZrVMO^S1|Qp5|HY4Bh0U;X!rFugY5sl{ nkAZ3fKtUn;FHj9TY)4260OW!(3rhm7d0-a8te8iefP>dO$L{S~P78@$4|tg<9jL#1tm$W8SB;7g|A;j5tIa8^5M_!GO~)vM)w}5YAMchOZZ*2YmiRmL9Ex}c_ouR&W#_*x}!*H8>LIC*H122 zLfFU-PU&YvgbQ+nt%Hg;(WH`4cZm=`EMH%6JZl&yO9nzV;*-TRezwzrj{Kv4dUc!` zwMJj|+hAr*+A##I_!A6@yH;El_VX9_JklEC)f`%b3zfC%wM}1*2Pvu#T(T(7#{M=~ zF{JdU3^JUTCQ=eCjdX2V*MBorTev~^MXXKQ(mU0up^xy(3vK8Lx)Nc9Y!Lo)$Ya5l z??J7_FK58v&C?+>(+AyOoF^o*$rv|k3#NLu)TP0g*v!I4AXU)?^2?{UG4Zh2QLAVp zoK;i2k^!o7TdlD{*`k<7Y$M&(Au5nUJi9d_8jS6xQ6>i_uVk@_WLSUHR{6r^HdVBn zv+iG&po@vJ<)$Tk<~s z&bGz=%i}I;3p8qWqhX*7pGI1|nX;>~v`=GF?3iUXOy@K|zxq?aNW~#oFdA>^U|F^1 zaSK3Yx4_!FWXDhru9C!DjAGzV)tTbBBV~-Mc%0^Ogfz6A89pD}=Q-+R&H2WIHjYkU@L?v+%d|d`zO6o$k@n`9=^}bq+ zmrR5dOj~#3wqkcpcybhKAwFTq&-_%c8dVJVo^qbT>&??3*&9YBMVY=AgI=PFH38nt zSjxG$uc_kA>Cn~QE>h>uy1>G1Olt-NMOhnp83GvO^WG~e*(K#~lN$wDbZOyD)M_xZ zCv*ZjjhD)qftxAv+D1F~bgDdR3QGPFWVkKJWJxyB4hykNKmhXXZr7Pl+{X6l2c)8@skz5)9q9TG@QvbB6C8) z1J?C|w~P{@#D~onyfUI3S=Bv;p+p6m|zunAL`-ROBVx^H8M%YZm=)7`Ni^Y7G81&(G zStpRScHo1Ig2H5pDL~{(it`DcL#qq<3$18{=Q9Zd!xY6et(QwcC&?kq&+hhnW(x^8 z0JPj^t|u&mwHnm9Q>%mn=10>jDaO`zPG}7y3_=SZ#o`Fi(jX{G`Hu-oh~q2O{obVW z{?CBo`5(~Y$P1WXg`eUevbk>msr3I zbB3><#h~LwxxDT)9;A>{ZpUNK0)wbQEq-xbwOM@q+Z;x4R(8StJ1^b)Lz!r6 z4}1a`Vgm6It1^MfUKvVxh%(^iQ_Pe-qd}GMH1CrwS3-fGgPT%FjmFVk|eddq`X z^iqhq=SKcS3H=l0SJDVJ0j`h#z=eUfC89S=UR0da2GN6|x2jy_H|`AYWBG|ycXxui zpGT7NyI;Zs^&BnABykcQrv6O##?9dLB1h{pqc$(}nZs4yl|6qKlf?qdX#ev1MrdmN$nADP zR?q$Hn?G$7J)WuQW8%G#_USQ&g^S3ld1V3?je@i;{1cW0{{C{7DVvBwyLdDoG#GBe zPU=Uc{kcB)KqQxl&r~+4?q?Wz(1+nR6{vZ!J?zy@r7rg#g<>w0M}>po+oZeZOm_Mq-&h>o z>Fd~A{$0t6cp@uIfEEloANo_t?Ghp(;r&TS|CeE45aKVHZZvH}NgSUwnR~zRsfE zJFl)(*Nb^qL9(uVOrb|cvitC;FE67_^=qm`Rb8`zXfs{^Qjua$)fYRKRj(Ip90)ep z5?0zBcD4qt1{inW5XwuaLMdk=uWgR2L^PmIpu3icduOUG^&RYNSCIP`e5q8TmDNZPeqrfOp!2WVnaFlc*ZgSo>A+2jFj_PRVMk1BP$#Z|3U-$l`z-I5u6b(X z)|RVa8%iqnL?JMF30?h!FUcysOQK}OUN-TQnk8^Bd{|Oui)Lk+e4KJX4xqfke?`fs z^*Ru1uj{&j@HY#si$N)$U-POFsNAj51m5CE97LDh_v^#O>Lof~O_tCG-?Ry4=M?36 zp(PeVN8L_7M`(adPw7o?Cv72^fg^UI@ z{<&9jvCJETSfMX;St%LcH4i{+{MJ$Vl)y|SO=7kczDwjNsZ8G|eZlhYulD>FzQm#5 zlg%BEc+=&e-tF@)X16v)?3N%@b3U=aIn71#__{Ok+q~IsyMx*;poY8N;=RB}xT+hf z5W;4W%0t?&hiRfWm$-|YFRmvy=kSv@InBAn2}S#nvRs;nl*DqSxeXI3*Ws>B5^#im zm+)r%BF}P8P_kzMo9Bw#a{BM#-19SN2?E)(YY*ebj z%g2(kH(XC4R`v!`CZ6E5na>m7zhTmJ(F}a=P9dWnDf7!)nZjwdDb)SayvVUM5L**! z{XV60;;i(e$zV|D81_R_r)$Ch)v$ftt%P@+cUR%N^9U5n#mjAykQ$sxhiX*7>z9l- z*T+>sQkDSm4vBNdp@tTZqK!EhcHnK!l)EdnuEL6Me`s?p9@Hd2T)&nYR1)UgJ%Z(|bLPJ7Yc!7jO{Qn7WW9n*Y zq2ccO(b4*!ysy+saX8yz$b}1*OxH`v*Y_NQKCKI(0^n7P0*6*H$ei9XdB5J4 z>yZDe`^?srB4e&}k(fPopBd^Dk37w_vJKeY;JvA}-)hHATNIPNPkQYcslQ~?PDtE7 zYQSOn)7}0edkX&Wkgv4a*hY~cV2kwC>6b*S^j>|pkBcTLqU1eGf?V)7QBDGqSk%b(r_We0EfWfF?v;Yo zWwEZjB>p{m6|Wc)Nt>-Tyy?BH6qmmtJ`v0EW2|yx?8gT_3f$#M>So0fMj*3rqsgay z7_X!@h22i#_u$jv{sGXOKHA%G_3q%?>HT2dqHKXyUvX(7=C0BFXe?WrYq(P+k)X(+ zG&bOrCZ9^i9;3P@s5Na;si3?^6q~b9x3OsQfnKBEFjcu~2pTji zd>yJ(<#2H&z`UmC%n&qR3Sf|@+33(Kvp;W%d9I$N5YH0Ii@Wti2j&qK9oSAtt4A%q zqYsK+>E3X6{t#>wkl7wk>~s6&bjT_>H9ollqgTO&yPwAWqgzaBgwgYw2vA6`yrHJw z;}J`bZz}bu)4aaori}=!DB2@bJa)8bcs+8-%1@(Hf|G}F1Dd7#1yFb?`WP)wCz3zX zuugo|La+Tb6rQ*RHyt*gH}-_TrLMtQBBue5)Hkc=Dl=*wYJYW9SW2Rr(Vy0)F;Z(= zG_ud%_cRw(yh#))tasE|G-^A|W2%$O)B$&MoC?|v%-m=HA-JT;b>+fArNuqW=Bbm# zs&5M&M4g)kVna81frD$29gbwv;{(z5@4wCB}u(=zWuK+87%jXG%k9rPnIG+hSkfj)pgy4&zL%9`OhaHcahbj-h_Q4oGhN}}@WpJg#85{!KN;L^ zd!eDsLH$>KMN5*2ASz<=+uRA;R1ix91d(+pCIVMv?yyR4B*TQ;kv$9)>q^mIjf-H_ z5_lssCJV^#0j$WJ>4LJBCxr4N!`AG02|a4e#Q4T|A`jjbHzrLi9H^$5mm!~9L&%iA zA-(^6p*3Us`#MDKfP!^fS#Xm1tHlyIFIS(bv!c>(5~{MF4U{fVMsgkWnkIPxX>y^A~WFc}QI$jNbrNDPiuW;*y%g4Be$F+25hqeWy`A!B(R`9uSs*4;PC(udY zLkVG}J9*A7i!FyHd6QJkUOGiPqG57$Ra=m?%H{HG?)Qek2J^d94U8*;xnn#wfT%of z(Fyz5T>4V7w<&e-IQY=ylz-W?;@Y82RPPh$*Z1OSq++(m!agn%ZdRkIcm9ftnj5t4 zZx#aD&QX5PW^2d8*^M^tZ;l6UjQ`=x_w*>p*iV;hku%Sbkah@R7>w*dnCUlDX6qh{ z9&=_ZW)o&Jd1PoOE;??oR5fz^6l%F^AgdzTMa!NdnR?)BAKXw}+LoTSU(O+ca^DaD zDlZ)>Q#+d@Q6w?CEV^efCZjAmBW>?9Z8@Bv7dFf@?zG5enW)mJakQ!5h?@pSLyx6g zPuXyBN!%2rRY(^FR&MD4iRSut?;~i`HZS29#>M!Sv7xhR^IOEHf`TixhCwtscM4t7 zj$tGEj3g1!^&g{=<=`BjF{C%3s3EG0>wO0R_neRP%DqJ0=kNvx z6*=Z%J&HTDB(r9Ekm(-*FyP=jjSRJP~;Q@Z{ACY9TYvDv>D-6x-9M3=a;3BH+h z$II!Z6?hrUPX?CUl-plj{ZldlHuk@$m3(J z!|lFxKTd^&{i>zIEi~V6qeyqzbG1EK78a$TD)mkHH|T2HhIkIFuw(A08PygQmRl~Z zi{wm3y`O%^ATezK0V*NEw~u{5GkH^=tE12@!ZK*7Wsnpz6 za&O~Fc}?WLvF%a$%11+MGlDeDt9!uMe_?&TL@-yBfeFOs|I2GpVR1HZ^sI7ZT0EZ; z!G7(-;}WzTMW?X@(ea3gGfIoyyuO^#j2)?c49wP;(&BK^ulz0)Kk2XlxnE6HcskWH zhSI&zIuVzJIVKhU_5`g%^R4jy zIz~KNI^p9P^CT9_N}KRKla&q_AVB*2$V0Oksic&zXklVP29(;lh`%nQA*6dhB~?<3 zNY4ktRzIV*hOQaNAJv<{V(I4&my@au4&$FoY^B0Dx4}^Ms{-t%R$aY)s?Ha0kZp`` z^82Y89ko}f(5ATAb+znzMv>X{iKlhQotR7U#FFZf*Xd_kV3pawdq;=rg3au2@rf&b zc-e3|+GWO{19BQ`!KbR)C@PqBt=LpV1KbP9?N;;|bm}KBX8jThFfb$2QPAe}sN}ex zE=9Qdo_x}E7*!Y+(@#?(U2^SPFbal%*#5=b)VdtKQuvncMF5ny&Bd6!jzr+ z@VIo6%zBkc#QMrB$x|a5NVEs)tCp{{gK8y{g*ml`B@aNjCIo@cWkRr-HrWj&?6vdc z<)Oj!c(D2R(2-eo1}~b$b?fVf=yoY;x`z;f6n_i;{O0*NBpG# From b8b585c66bed633007f9e4d572bf851e8a69b58f Mon Sep 17 00:00:00 2001 From: Carl Higgs Date: Fri, 31 Mar 2023 10:25:47 +1100 Subject: [PATCH 14/26] refactored network creation script _03_create_network_resources.py, abstracting the core functions (configure OSMnx, generate network, clean intersections, create pgrouting topology); this will make it easier to investigate for discussion #235 --- .../_03_create_network_resources.py | 264 +++++++++--------- process/subprocesses/script_running_log.py | 18 +- 2 files changed, 153 insertions(+), 129 deletions(-) diff --git a/process/subprocesses/_03_create_network_resources.py b/process/subprocesses/_03_create_network_resources.py index 203f689c..6fd47958 100644 --- a/process/subprocesses/_03_create_network_resources.py +++ b/process/subprocesses/_03_create_network_resources.py @@ -23,21 +23,78 @@ from tqdm import tqdm -def gdf_to_postgis_format(gdf, engine, table, rename_geometry='geom'): - """Sets geometry with optional new name (e.g. 'geom') and writes to PostGIS, returning the reformatted GeoDataFrame.""" - gdf.columns = [ - rename_geometry if x == 'geometry' else x for x in gdf.columns - ] - gdf = gdf.set_geometry(rename_geometry) - with engine.connect() as connection: - gdf.to_postgis( - table, connection, index=True, +def osmnx_configuration(region_config, network): + """Set up OSMnx for network retrieval and analysis, including check of configuration.""" + ox.settings.use_cache = True + ox.settings.log_console = True + # set OSMnx to retrieve filtered network to match OpenStreetMap publication date + osm_publication_date = f"""[date:"{datetime.strptime(str(region_config['OpenStreetMap']['publication_date']), '%Y%m%d').strftime('%Y-%m-%d')}T00:00:00Z"]""" + ox.settings.overpass_settings = ( + '[out:json][timeout:{timeout}]' + osm_publication_date + '{maxsize}' + ) + if not network['osmnx_retain_all']: + print( + """Note: "osmnx_retain_all = False" ie. only main network segment is retained. Please ensure this is appropriate for your study region (ie. networks on real islands may be excluded).""", + ) + elif network['osmnx_retain_all']: + print( + """Note: "osmnx_retain_all = True" ie. all network segments will be retained. Please ensure this is appropriate for your study region (ie. networks on real islands will be included, however network artifacts resulting in isolated network segments, or network islands, may also exist. These could be problematic if sample points are snapped to erroneous, mal-connected segments. Check results.).""", + ) + else: + sys.exit( + "Please ensure the osmnx_retain_all has been defined for this region with values of either 'False' or 'True'", + ) + + +def generate_pedestrian_network(engine, network, network_study_region, crs): + """Generate pedestrian network using OSMnx and store in a PostGIS database, or otherwise retrieve it.""" + if db_contents.has_table('nodes') and db_contents.has_table('edges'): + print( + f'Network "pedestrian" for {network_study_region} has already been processed.', + ) + print('\nLoading the pedestrian network.') + with engine.connect() as connection: + nodes = gpd.read_postgis('nodes', connection, index_col='osmid') + with engine.connect() as connection: + edges = gpd.read_postgis( + 'edges', connection, index_col=['u', 'v', 'key'], + ) + G_proj = ox.graph_from_gdfs(nodes, edges, graph_attrs=None) + else: + G = derive_pedestrian_network( + engine, + network_study_region, + network['osmnx_retain_all'], + network['polygon_iteration'], + ) + print(' - Remove unnecessary key data from edges') + att_list = { + k + for n in G.edges + for k in G.edges[n].keys() + if k not in ['osmid', 'length'] + } + capture_output = [ + [d.pop(att, None) for att in att_list] + for n1, n2, d in tqdm(G.edges(data=True), desc=' ' * 18) + ] + del capture_output + print(' - Project graph') + G_proj = ox.project_graph(G, to_crs=crs['srid']) + if G_proj.is_directed(): + G_proj = G_proj.to_undirected() + print( + ' - Save projected graph edges and node GeoDataFrames to PostGIS', ) + nodes, edges = ox.graph_to_gdfs(G_proj) + gdf_to_postgis_format(nodes, engine, 'nodes') + gdf_to_postgis_format(edges, engine, 'edges') -def derive_routable_network( +def derive_pedestrian_network( engine, network_study_region, osmnx_retain_all, network_polygon_iteration, ): + """Derive routable pedestrian network using OSMnx.""" print( 'Creating and saving pedestrian roads network... ', end='', flush=True, ) @@ -98,130 +155,51 @@ def derive_routable_network( return G -def main(): - # simple timer for log file - start = time.time() - script = os.path.basename(sys.argv[0]) - task = 'Create network resources' - engine = create_engine( - f'postgresql://{db_user}:{db_pwd}@{db_host}/{db}', - future=True, - pool_pre_ping=True, - connect_args={ - 'keepalives': 1, - 'keepalives_idle': 30, - 'keepalives_interval': 10, - 'keepalives_count': 5, - }, - ) - db_contents = inspect(engine) - if network['buffered_region']: - network_study_region = buffered_urban_study_region - else: - network_study_region = study_region +def gdf_to_postgis_format(gdf, engine, table, rename_geometry='geom'): + """Sets geometry with optional new name (e.g. 'geom') and writes to PostGIS, returning the reformatted GeoDataFrame.""" + gdf.columns = [ + rename_geometry if x == 'geometry' else x for x in gdf.columns + ] + gdf = gdf.set_geometry(rename_geometry) + with engine.connect() as connection: + gdf.to_postgis( + table, connection, index=True, + ) - if not ( - db_contents.has_table('edges') - and db_contents.has_table('nodes') - and db_contents.has_table(intersections_table) - ): - print('\nGet networks and save as graphs.') - ox.settings.use_cache = True - ox.settings.log_console = True - # set OSMnx to retrieve filtered network to match OpenStreetMap publication date - osm_publication_date = f"""[date:"{datetime.strptime(str(region_config['OpenStreetMap']['publication_date']), '%Y%m%d').strftime('%Y-%m-%d')}T00:00:00Z"]""" - ox.settings.overpass_settings = ( - '[out:json][timeout:{timeout}]' - + osm_publication_date - + '{maxsize}' + +def clean_intersections(engine, G_proj, network, intersections_table): + """Generate cleaned intersections using OSMnx and store in postgis database, or otherwise retrieve them.""" + db_contents = inspect(engine) + if not db_contents.has_table(intersections_table): + ## Copy clean intersections to postgis + print('\nPrepare and copy clean intersections to postgis... ') + # Clean intersections + intersections = ox.consolidate_intersections( + G_proj, + tolerance=network['intersection_tolerance'], + rebuild_graph=False, + dead_ends=False, ) - if not network['osmnx_retain_all']: - print( - """Note: "osmnx_retain_all = False" ie. only main network segment is retained. Please ensure this is appropriate for your study region (ie. networks on real islands may be excluded).""", - ) - elif network['osmnx_retain_all']: - print( - """Note: "osmnx_retain_all = True" ie. all network segments will be retained. Please ensure this is appropriate for your study region (ie. networks on real islands will be included, however network artifacts resulting in isolated network segments, or network islands, may also exist. These could be problematic if sample points are snapped to erroneous, mal-connected segments. Check results.).""", - ) - else: - sys.exit( - "Please ensure the osmnx_retain_all has been defined for this region with values of either 'False' or 'True'", - ) - if db_contents.has_table('nodes') and db_contents.has_table('edges'): - print( - f'Network "pedestrian" for {network_study_region} has already been processed.', - ) - print('\nLoading the pedestrian network.') - with engine.connect() as connection: - nodes = gpd.read_postgis( - 'nodes', connection, index_col='osmid', - ) - with engine.connect() as connection: - edges = gpd.read_postgis( - 'edges', connection, index_col=['u', 'v', 'key'], - ) - G_proj = ox.graph_from_gdfs(nodes, edges, graph_attrs=None) - else: - G = derive_routable_network( - engine, - network_study_region, - network['osmnx_retain_all'], - network['polygon_iteration'], - ) - print(' - Remove unnecessary key data from edges') - att_list = { - k - for n in G.edges - for k in G.edges[n].keys() - if k not in ['osmid', 'length'] - } - capture_output = [ - [d.pop(att, None) for att in att_list] - for n1, n2, d in tqdm(G.edges(data=True), desc=' ' * 18) - ] - del capture_output - print(' - Project graph') - G_proj = ox.project_graph(G, to_crs=crs['srid']) - if G_proj.is_directed(): - G_proj = G_proj.to_undirected() - print( - ' - Save projected graph edges and node GeoDataFrames to PostGIS', - ) - nodes, edges = ox.graph_to_gdfs(G_proj) - gdf_to_postgis_format(nodes, engine, 'nodes') - gdf_to_postgis_format(edges, engine, 'edges') - if not db_contents.has_table(intersections_table): - ## Copy clean intersections to postgis - print('\nPrepare and copy clean intersections to postgis... ') - # Clean intersections - intersections = ox.consolidate_intersections( - G_proj, - tolerance=network['intersection_tolerance'], - rebuild_graph=False, - dead_ends=False, + intersections = gpd.GeoDataFrame( + intersections, columns=['geom'], + ).set_geometry('geom') + with engine.connect() as connection: + intersections.to_postgis( + intersections_table, connection, index=True, ) - intersections = gpd.GeoDataFrame( - intersections, columns=['geom'], - ).set_geometry('geom') - with engine.connect() as connection: - intersections.to_postgis( - intersections_table, connection, index=True, - ) - print(' - Done.') + print(' - Done.') - else: - print( - ' - It appears that clean intersection data has already been prepared and imported for this region.', - ) else: print( - '\nIt appears that edges and nodes have already been prepared and imported for this region.', + ' - It appears that clean intersection data has already been prepared and imported for this region.', ) + +def create_pgrouting_network_topology(engine): + """Create network topology for pgrouting for later analysis of node relations for sample points.""" sql = """SELECT 1 WHERE to_regclass('public.edges_target_idx') IS NOT NULL;""" with engine.begin() as connection: res = connection.execute(text(sql)).first() - if res is None: print('\nCreate network topology...') sql = """ @@ -236,9 +214,7 @@ def main(): """ with engine.begin() as connection: min_id, max_id = connection.execute(text(sql)).first() - print(f'there are {max_id - min_id + 1} edges to be processed') - interval = 10000 for x in range(min_id, max_id + 1, interval): sql = f"select pgr_createTopology('edges', 1, 'geom', 'ogc_fid', rows_where:='ogc_fid>={x} and ogc_fid<{x+interval}');" @@ -260,6 +236,44 @@ def main(): ' - It appears that the routable pedestrian network has already been set up for use by pgRouting.', ) + +def main(): + # simple timer for log file + start = time.time() + script = os.path.basename(sys.argv[0]) + task = 'Create network resources' + engine = create_engine( + f'postgresql://{db_user}:{db_pwd}@{db_host}/{db}', + future=True, + pool_pre_ping=True, + connect_args={ + 'keepalives': 1, + 'keepalives_idle': 30, + 'keepalives_interval': 10, + 'keepalives_count': 5, + }, + ) + db_contents = inspect(engine) + if network['buffered_region']: + network_study_region = buffered_urban_study_region + else: + network_study_region = study_region + + if not ( + db_contents.has_table('edges') + and db_contents.has_table('nodes') + and db_contents.has_table(intersections_table) + ): + osmnx_configuration(region_config, network) + generate_pedestrian_network(engine, network, network_study_region, crs) + clean_intersections(engine, G_proj, network, intersections_table) + else: + print( + '\nIt appears that edges and nodes have already been prepared and imported for this region.', + ) + + create_pgrouting_network_topology(engine) + # ensure user is granted access to the newly created tables with engine.begin() as connection: connection.execute(text(grant_query)) diff --git a/process/subprocesses/script_running_log.py b/process/subprocesses/script_running_log.py index 757c0d77..0c39a5c8 100644 --- a/process/subprocesses/script_running_log.py +++ b/process/subprocesses/script_running_log.py @@ -12,17 +12,27 @@ import psycopg2 # Import custom variables for National Liveability indicator process -from _project_setup import db, db_host, db_port, db_pwd, db_user +from _project_setup import ( + analysis_timezone, + db, + db_host, + db_port, + db_pwd, + db_user, +) + +os.environ['TZ'] = analysis_timezone +time.tzset() -# Define script logging to study region database function def script_running_log(script='', task='', start='', prefix=''): + """Define script logging to study region database function.""" # Initialise postgresql connection conn = psycopg2.connect( dbname=db, user=db_user, password=db_pwd, host=db_host, port=db_port, ) curs = conn.cursor() - date_time = time.strftime('%Y%m%d-%H%M%S') + date_time = time.strftime('%Y-%m-%d_%H%M') duration = (time.time() - start) / 60 log_table = """ @@ -43,7 +53,7 @@ def script_running_log(script='', task='', start='', prefix=''): curs.execute(log_table) conn.commit() print( - """Processing completed at {}\n- Task: {}\n- Duration: {:04.2f} minutes\n""".format( + """\nProcessing completed at {}\n- Task: {}\n- Duration: {:04.2f} minutes\n""".format( date_time, task, duration, ), ) From 6baac3dbd0b865450b2c1d79abf5aa560b3ceb6d Mon Sep 17 00:00:00 2001 From: Carl Higgs Date: Fri, 31 Mar 2023 10:31:21 +1100 Subject: [PATCH 15/26] fixed up some omissions in the just-now refactored network creation script --- process/subprocesses/_03_create_network_resources.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/process/subprocesses/_03_create_network_resources.py b/process/subprocesses/_03_create_network_resources.py index 6fd47958..868e0884 100644 --- a/process/subprocesses/_03_create_network_resources.py +++ b/process/subprocesses/_03_create_network_resources.py @@ -48,6 +48,7 @@ def osmnx_configuration(region_config, network): def generate_pedestrian_network(engine, network, network_study_region, crs): """Generate pedestrian network using OSMnx and store in a PostGIS database, or otherwise retrieve it.""" + db_contents = inspect(engine) if db_contents.has_table('nodes') and db_contents.has_table('edges'): print( f'Network "pedestrian" for {network_study_region} has already been processed.', @@ -60,6 +61,7 @@ def generate_pedestrian_network(engine, network, network_study_region, crs): 'edges', connection, index_col=['u', 'v', 'key'], ) G_proj = ox.graph_from_gdfs(nodes, edges, graph_attrs=None) + return G_proj else: G = derive_pedestrian_network( engine, @@ -89,6 +91,7 @@ def generate_pedestrian_network(engine, network, network_study_region, crs): nodes, edges = ox.graph_to_gdfs(G_proj) gdf_to_postgis_format(nodes, engine, 'nodes') gdf_to_postgis_format(edges, engine, 'edges') + return G_proj def derive_pedestrian_network( @@ -265,7 +268,9 @@ def main(): and db_contents.has_table(intersections_table) ): osmnx_configuration(region_config, network) - generate_pedestrian_network(engine, network, network_study_region, crs) + G_proj = generate_pedestrian_network( + engine, network, network_study_region, crs, + ) clean_intersections(engine, G_proj, network, intersections_table) else: print( From 4af28be47537e6e8a3c7135695a3c6c5b7d6d094 Mon Sep 17 00:00:00 2001 From: Carl Higgs Date: Fri, 31 Mar 2023 12:27:00 +1100 Subject: [PATCH 16/26] retained edges with full geometry for pgrouting topological analysis --- .../_03_create_network_resources.py | 38 +++++++++++++++++-- .../_11_neighbourhood_analysis.py | 6 ++- 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/process/subprocesses/_03_create_network_resources.py b/process/subprocesses/_03_create_network_resources.py index 868e0884..52774f8d 100644 --- a/process/subprocesses/_03_create_network_resources.py +++ b/process/subprocesses/_03_create_network_resources.py @@ -69,6 +69,10 @@ def generate_pedestrian_network(engine, network, network_study_region, crs): network['osmnx_retain_all'], network['polygon_iteration'], ) + print( + ' - Save edges with geometry to postgis prior to simplification', + ) + graph_to_postgis(G, engine, 'edges', nodes=False) print(' - Remove unnecessary key data from edges') att_list = { k @@ -86,11 +90,15 @@ def generate_pedestrian_network(engine, network, network_study_region, crs): if G_proj.is_directed(): G_proj = G_proj.to_undirected() print( - ' - Save projected graph edges and node GeoDataFrames to PostGIS', + ' - Save simplified, projected, undirected graph edges and node GeoDataFrames to PostGIS', + ) + graph_to_postgis( + G, + engine, + nodes_table='nodes_simplified', + edges_table='edges_simplified', + nodes=False, ) - nodes, edges = ox.graph_to_gdfs(G_proj) - gdf_to_postgis_format(nodes, engine, 'nodes') - gdf_to_postgis_format(edges, engine, 'edges') return G_proj @@ -154,10 +162,32 @@ def derive_pedestrian_network( # induce a subgraph on those nodes G = nx.MultiDiGraph(G.subgraph(nodes)) + G = G.to_undirected() print('Done.') return G +def graph_to_postgis( + G, + engine, + nodes_table='nodes', + edges_table='edges', + nodes=True, + edges=True, +): + """Save graph nodes and/or edges to postgis database.""" + if nodes is True and edges is False: + nodes = ox.graph_to_gdfs(G, edges=False) + gdf_to_postgis_format(nodes, engine, nodes_table) + if edges is True and nodes is False: + edges = ox.graph_to_gdfs(G, nodes=False) + gdf_to_postgis_format(edges, engine, edges_table) + else: + nodes, edges = ox.graph_to_gdfs(G) + gdf_to_postgis_format(nodes, engine, nodes_table) + gdf_to_postgis_format(edges, engine, edges_table) + + def gdf_to_postgis_format(gdf, engine, table, rename_geometry='geom'): """Sets geometry with optional new name (e.g. 'geom') and writes to PostGIS, returning the reformatted GeoDataFrame.""" gdf.columns = [ diff --git a/process/subprocesses/_11_neighbourhood_analysis.py b/process/subprocesses/_11_neighbourhood_analysis.py index 0c386a8a..ce91ca9e 100644 --- a/process/subprocesses/_11_neighbourhood_analysis.py +++ b/process/subprocesses/_11_neighbourhood_analysis.py @@ -57,10 +57,12 @@ def main(): ) db_contents = inspect(engine) with engine.connect() as connection: - nodes = gpd.read_postgis('nodes', connection, index_col='osmid') + nodes = gpd.read_postgis( + 'nodes_simplified', connection, index_col='osmid', + ) with engine.connect() as connection: edges = gpd.read_postgis( - 'edges', connection, index_col=['u', 'v', 'key'], + 'edges_simplified', connection, index_col=['u', 'v', 'key'], ) G_proj = ox.graph_from_gdfs(nodes, edges, graph_attrs=None) with engine.connect() as connection: From dc1b47d0cf0824800a8909177488ba44f523b2d6 Mon Sep 17 00:00:00 2001 From: Carl Higgs Date: Fri, 31 Mar 2023 13:04:02 +1100 Subject: [PATCH 17/26] retained edges with full geometry for pgrouting topological analysis (corrected implementation of this) --- .../_03_create_network_resources.py | 36 +++++++++++-------- .../_11_neighbourhood_analysis.py | 4 +-- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/process/subprocesses/_03_create_network_resources.py b/process/subprocesses/_03_create_network_resources.py index 52774f8d..fa2d0601 100644 --- a/process/subprocesses/_03_create_network_resources.py +++ b/process/subprocesses/_03_create_network_resources.py @@ -46,7 +46,9 @@ def osmnx_configuration(region_config, network): ) -def generate_pedestrian_network(engine, network, network_study_region, crs): +def generate_pedestrian_network_nodes_edges( + engine, network, network_study_region, crs, +): """Generate pedestrian network using OSMnx and store in a PostGIS database, or otherwise retrieve it.""" db_contents = inspect(engine) if db_contents.has_table('nodes') and db_contents.has_table('edges'): @@ -72,7 +74,9 @@ def generate_pedestrian_network(engine, network, network_study_region, crs): print( ' - Save edges with geometry to postgis prior to simplification', ) - graph_to_postgis(G, engine, 'edges', nodes=False) + graph_to_postgis( + G, engine, 'edges', nodes=False, geometry_name='geom_4326', + ) print(' - Remove unnecessary key data from edges') att_list = { k @@ -93,11 +97,10 @@ def generate_pedestrian_network(engine, network, network_study_region, crs): ' - Save simplified, projected, undirected graph edges and node GeoDataFrames to PostGIS', ) graph_to_postgis( - G, + G_proj, engine, - nodes_table='nodes_simplified', + nodes_table='nodes', edges_table='edges_simplified', - nodes=False, ) return G_proj @@ -174,29 +177,30 @@ def graph_to_postgis( edges_table='edges', nodes=True, edges=True, + geometry_name='geom', ): """Save graph nodes and/or edges to postgis database.""" if nodes is True and edges is False: nodes = ox.graph_to_gdfs(G, edges=False) - gdf_to_postgis_format(nodes, engine, nodes_table) + gdf_to_postgis_format(nodes, engine, nodes_table, geometry_name) if edges is True and nodes is False: edges = ox.graph_to_gdfs(G, nodes=False) - gdf_to_postgis_format(edges, engine, edges_table) + gdf_to_postgis_format(edges, engine, edges_table, geometry_name) else: nodes, edges = ox.graph_to_gdfs(G) - gdf_to_postgis_format(nodes, engine, nodes_table) - gdf_to_postgis_format(edges, engine, edges_table) + gdf_to_postgis_format(nodes, engine, nodes_table, geometry_name) + gdf_to_postgis_format(edges, engine, edges_table, geometry_name) -def gdf_to_postgis_format(gdf, engine, table, rename_geometry='geom'): +def gdf_to_postgis_format(gdf, engine, table, geometry_name='geom'): """Sets geometry with optional new name (e.g. 'geom') and writes to PostGIS, returning the reformatted GeoDataFrame.""" gdf.columns = [ - rename_geometry if x == 'geometry' else x for x in gdf.columns + geometry_name if x == 'geometry' else x for x in gdf.columns ] - gdf = gdf.set_geometry(rename_geometry) + gdf = gdf.set_geometry(geometry_name) with engine.connect() as connection: gdf.to_postgis( - table, connection, index=True, + table, connection, index=True, if_exists='replace', ) @@ -235,7 +239,9 @@ def create_pgrouting_network_topology(engine): res = connection.execute(text(sql)).first() if res is None: print('\nCreate network topology...') - sql = """ + sql = f""" + ALTER TABLE edges ADD COLUMN IF NOT EXISTS "geom" geometry; + UPDATE edges SET geom = ST_Transform(geom_4326, {crs['srid']}); ALTER TABLE edges ADD COLUMN IF NOT EXISTS "from" bigint; ALTER TABLE edges ADD COLUMN IF NOT EXISTS "to" bigint; UPDATE edges SET "from" = v, "to" = u WHERE key != 2; @@ -298,7 +304,7 @@ def main(): and db_contents.has_table(intersections_table) ): osmnx_configuration(region_config, network) - G_proj = generate_pedestrian_network( + G_proj = generate_pedestrian_network_nodes_edges( engine, network, network_study_region, crs, ) clean_intersections(engine, G_proj, network, intersections_table) diff --git a/process/subprocesses/_11_neighbourhood_analysis.py b/process/subprocesses/_11_neighbourhood_analysis.py index ce91ca9e..6499b979 100644 --- a/process/subprocesses/_11_neighbourhood_analysis.py +++ b/process/subprocesses/_11_neighbourhood_analysis.py @@ -57,9 +57,7 @@ def main(): ) db_contents = inspect(engine) with engine.connect() as connection: - nodes = gpd.read_postgis( - 'nodes_simplified', connection, index_col='osmid', - ) + nodes = gpd.read_postgis('nodes', connection, index_col='osmid') with engine.connect() as connection: edges = gpd.read_postgis( 'edges_simplified', connection, index_col=['u', 'v', 'key'], From c55990ffdae223ddf4e5f8b93985b530205ba794 Mon Sep 17 00:00:00 2001 From: Carl Higgs Date: Mon, 3 Apr 2023 15:32:24 +1000 Subject: [PATCH 18/26] Created methods for generating metadata, as per #230 --- process/3_generate_resources.py | 13 +- .../assets/metadata_template.yml | 233 ++++++++++++++++++ process/configuration/templates/config.yml | 14 +- .../subprocesses/_01_create_study_region.py | 36 ++- process/subprocesses/_report_functions.py | 8 + 5 files changed, 294 insertions(+), 10 deletions(-) create mode 100644 process/configuration/assets/metadata_template.yml diff --git a/process/3_generate_resources.py b/process/3_generate_resources.py index 42101664..940e8acb 100644 --- a/process/3_generate_resources.py +++ b/process/3_generate_resources.py @@ -1,8 +1,4 @@ -""" -Global scorecards. - -Format and save indicator reports. -""" +"""Generate resources supporting urban indicator dissemination and usage.""" import os import shutil @@ -24,6 +20,7 @@ # import and set up functions from subprocesses._report_functions import ( + generate_metadata, generate_report_for_language, get_and_setup_language_cities, postgis_to_csv, @@ -105,6 +102,12 @@ def main(): f"{config.region['region_dir']}/{file}", ) print(f' {file}') + + # Generate metadata + print('\nMetadata') + metadata_xml = generate_metadata(config.region['region_dir'], codename) + print(f' {metadata_xml}') + # Generate reports languages = get_and_setup_language_cities(config) if languages == []: diff --git a/process/configuration/assets/metadata_template.yml b/process/configuration/assets/metadata_template.yml new file mode 100644 index 00000000..d642b12c --- /dev/null +++ b/process/configuration/assets/metadata_template.yml @@ -0,0 +1,233 @@ +mcf: + version: 1.0 + +metadata: + identifier: + language: en + charset: utf8 + hierarchylevel: dataset + datestamp: {datestamp} + dataseturi: + +spatial: + datatype: vector + geomtype: polygon + +identification: + language: eng + charset: utf8 + title: + en: Spatial urban indicator data for {name} + abstract: + en: A set of spatial urban indicators for {name}, derived using the Global Healthy & Sustainable City Indicators tool (https://global-healthy-liveable-cities.github.io/). + dates: + creation: {datestamp} + keywords: + default: + keywords: + en: [urban planning, built environment, health, policy indicators, spatial indicators] + topiccategory: + - Urban analysis and development + extents: + spatial: + - bbox: {spatial_bbox} + crs: {spatial_crs} + temporal: + - begin: {year} + end: {year} + resolution: P1Y + license: + name: ODbL + url: https://opendatacommons.org/licenses/odbl/1-0/ + rights: + en: Copyright (c) {dateyear} + url: https://healthysustainablecities.org + +contact: + pointOfContact: &contact_poc + organization: {authors} + url: {url} + individualname: {individualname} + positionname: {positionname} + email: {email} + contactinstructions: email + + distributor: *contact_poc + +distribution: + GOHSC: + url: https://healthysustainablecities.org + +acquisition: + note: see statement + +dataquality: + scope: + level: dataset + lineage: + statement: | + This dataset was derived using the Global Healthy & Sustainable City Indicators tool (https://global-healthy-liveable-cities.github.io/) developed to support the Global Observatory of Healthy and Sustainable Cities (https://healthysustainablcities.org)/. The analysis was configured as follows:\n\n{region_config} + +attributes: + - Category: Study region information + Description: Continent + Scale: city + Variable: Continent + - Category: Study region information + Description: Country + Scale: city + Variable: Country + - Category: Study region information + Description: 2-letter country code + Scale: city + Variable: ISO 3166-1 alpha-2 + - Category: Study region information + Description: Study region + Scale: 'city grid' + Variable: City + - Category: Derived study region statistics + Description: "Area (km\xB2; accounting for urban restrictions if applied)" + Scale: 'city grid' + Variable: Area (sqkm) + - Category: Derived study region statistics + Description: 'Population estimate + as per configured population data source' + Scale: 'city grid' + Variable: Population estimate + - Category: Derived study region statistics + Description: "Population per km\xB2" + Scale: 'city grid' + Variable: Population per sqkm + - Category: Derived study region statistics + Description: Intersection count (following consolidation based on intersection tolerance parameter in region configuration) + Scale: 'city grid' + Variable: Intersections + - Category: Derived study region statistics + Description: "Intersections per km\xB2" + Scale: 'city grid' + Variable: Intersections per sqkm + - Category: Linked covariates + Description: 'Total emission of CO 2 from the transport sector using non-short-cycle-organic fuels in 2015' + Scale: city + Variable: E_EC2E_T15 + - Category: Linked covariates + Description: 'Total emission of CO 2 from the energy sector + using short-cycle-organic fuels in 2015' + Scale: city + Variable: E_EC2O_T15 + - Category: Linked covariates + Description: Total emission of PM 2.5 from the transport sector in 2015 + Scale: city + Variable: E_EPM2_T15 + - Category: Linked covariates + Description: Total concertation of PM 2.5 for reference epoch 2014 + Scale: city + Variable: E_CPM2_T14 + - Category: Analytical statistic + Description: Sample points used in this analysis (generated along pedestrian network for populated grid areas) + Scale: 'city grid' + Variable: urban_sample_point_count + - Category: Indicator estimates + Description: 'Score (/1) for access within 500 m to a fresh food market / supermarket (source: OpenStreetMap or custom)' + Scale: grid + Variable: access_500m_fresh_food_market_score + - Category: Indicator estimates + Description: 'Score (/1) for access within 500 m to a convenience store (source: OpenStreetMap or custom)' + Scale: grid + Variable: access_500m_convenience_score + - Category: Indicator estimates + Description: 'Score (/1) for access within 500 m to a public transport (source: OpenStreetMap or custom)' + Scale: grid + Variable: access_500m_pt_osm_any_score + - Category: Indicator estimates + Description: 'Score (/1) for access within 500 m to a any public open space (source: OpenStreetMap)' + Scale: grid + Variable: access_500m_public_open_space_any_score + - Category: Indicator estimates + Description: 'Score (/1) for access within 500 m to a public open space larger than 1.5 hectares (source: OpenStreetMap)' + Scale: grid + Variable: access_500m_public_open_space_large_score + - Category: Indicator estimates + Description: 'Score (/1) for access within 500 m to a public transport (source: GTFS)' + Scale: grid + Variable: access_500m_pt_gtfs_any_score + - Category: Indicator estimates + Description: 'Score (/1) for access within 500 m to a public transport with average daytime weekday service frequency of 30 minutes or better (source: GTFS)' + Scale: grid + Variable: access_500m_pt_gtfs_freq_30_score + - Category: Indicator estimates + Description: 'Score (/1) for access within 500 m to a public transport with average daytime weekday service frequency of 20 minutes or better (source: GTFS)' + Scale: grid + Variable: access_500m_pt_gtfs_freq_20_score + - Category: Indicator estimates + Description: 'Score (/1) for access within 500 m to a any public transport stop (source: GTFS or OpenStreetMap/custom)' + Scale: grid + Variable: access_500m_pt_any_score + - Category: Indicator estimates + Description: 'Percentage of population with access within 500 m to a fresh food market / supermarket (source: OpenStreetMap or custom)' + Scale: city + Variable: pop_pct_access_500m_fresh_food_market_score + - Category: Indicator estimates + Description: 'Percentage of population with access within 500 m to a convenience store (source: OpenStreetMap or custom)' + Scale: city + Variable: pop_pct_access_500m_convenience_score + - Category: Indicator estimates + Description: 'Percentage of population with access within 500 m to a public transport (source: OpenStreetMap or custom)' + Scale: city + Variable: pop_pct_access_500m_pt_osm_any_score + - Category: Indicator estimates + Description: 'Percentage of population with access within 500 m to a any public open space (source: OpenStreetMap)' + Scale: city + Variable: pop_pct_access_500m_public_open_space_any_score + - Category: Indicator estimates + Description: 'Percentage of population with access within 500 m to a public open space larger than 1.5 hectares (source: OpenStreetMap)' + Scale: city + Variable: pop_pct_access_500m_public_open_space_large_score + - Category: Indicator estimates + Description: 'Percentage of population with access within 500 m to a public transport (source: GTFS)' + Scale: city + Variable: pop_pct_access_500m_pt_gtfs_any_score + - Category: Indicator estimates + Description: 'Percentage of population with access within 500 m to a public transport with average daytime weekday service frequency of 30 minutes or better (source: GTFS)' + Scale: city + Variable: pop_pct_access_500m_pt_gtfs_freq_30_score + - Category: Indicator estimates + Description: 'Percentage of population with access within 500 m to a public transport with average daytime weekday service frequency of 20 minutes or better (source: GTFS)' + Scale: city + Variable: pop_pct_access_500m_pt_gtfs_freq_20_score + - Category: Indicator estimates + Description: 'Percentage of population with access within 500 m to a any public transport stop (source: GTFS or OpenStreetMap/custom)' + Scale: city + Variable: pop_pct_access_500m_pt_any_score + - Category: Indicator estimates + Description: Average walkable neighbourhood poulation density (population weighted) * + Scale: city + Variable: pop_nh_pop_density + - Category: Indicator estimates + Description: Average walkable neighbourhood intersection density (population weighted) * + Scale: city + Variable: pop_nh_intersection_density + - Category: Indicator estimates + Description: Average daily living score (population weighted) * + Scale: city + Variable: pop_daily_living + - Category: Indicator estimates + Description: Average walkability (population weighted) * + Scale: city + Variable: pop_walkability + - Category: Indicator estimates + Description: 'Average walkable neighbourhood poulation density' + Scale: 'city grid' + Variable: local_nh_population_density + - Category: Indicator estimates + Description: 'Average walkable neighbourhood intersection density' + Scale: 'city grid' + Variable: local_nh_intersection_density + - Category: Indicator estimates + Description: 'Average daily living score ' + Scale: 'city grid' + Variable: local_daily_living + - Category: Indicator estimates + Description: 'Average walkability ' + Scale: 'city grid' + Variable: local_walkability diff --git a/process/configuration/templates/config.yml b/process/configuration/templates/config.yml index dddd3ccd..12fa9bb8 100644 --- a/process/configuration/templates/config.yml +++ b/process/configuration/templates/config.yml @@ -41,9 +41,17 @@ network_analysis: # For scaling binary cutoffs using a smooth transition; this parameter adjusts slope k of the transition documentation: authors: Global Healthy Liveable Cities Indicator Study Collaboration - # Authors of project - version: 2.0 - # Version of documentation + # Authors of project (for metadata) + url: + # Your URL (optional; for metadata) + individualname: + # Name of contact person for analysis (for metadata) + positionname: + # Position of contact person conducting analysis (for metadata) + email: + # Contact e-mail (for metadata) + version: 1.0 + # Edition of project (for metadata) points_of_interest: destination_list: [supermarket_osm, bakery_osm, meat_seafood_osm, fruit_veg_osm, deli_osm, convenience_osm, petrolstation_osm, newsagent_osm, market_osm, pt_any] osm_destination_definitions: process/configuration/osm_destination_definitions.csv diff --git a/process/subprocesses/_01_create_study_region.py b/process/subprocesses/_01_create_study_region.py index 62bbbf39..0f80d49b 100644 --- a/process/subprocesses/_01_create_study_region.py +++ b/process/subprocesses/_01_create_study_region.py @@ -190,12 +190,44 @@ def main(): and db_contents.has_table('urban_study_region') and db_contents.has_table(buffered_urban_study_region) ): - return f"""Study region boundaries have previously been created (study_region_boundary, urban_region, urban_study_region and {buffered_urban_study_region}). If you wish to recreate these, please manually drop them (e.g. using psql) or optionally drop the {db} database and start again (e.g. using the subprocesses/_drop_study_region_database.py utility script.\n""" + print('Updating project metadata:', end='', flush=True) + sql = """SELECT ST_Extent(ST_Transform(geom,4326)) FROM urban_study_region;""" + with engine.begin() as connection: + bbox = ( + connection.execute(text(sql)) + .fetchone()[0] + .replace(' ', ',') + .replace('(', '[') + .replace(')', ']')[3:] + ) + + yml = ( + f'{folder_path}/process/configuration/assets/metadata_template.yml' + ) + datestamp = time.strftime('%Y-%m-%d') + dateyear = time.strftime('%Y') + spatial_bbox = bbox + spatial_crs = 'WGS84' + + with open(yml) as f: + metadata = f.read() + + metadata = metadata.format(**globals(), **locals()) + + metadata = ( + f'# {name} ({codename})\n' + f'# YAML metadata control file (MCF) template for pygeometa\n{metadata}' + ) + metadata_yml = f'{region_dir}/{codename}_metadata.yml' + with open(metadata_yml, 'w') as f: + f.write(metadata) + print(f' {os.path.basename(metadata_yml)}\n') + # print(f"""Study region boundaries have been created (study_region_boundary, urban_region, urban_study_region and {buffered_urban_study_region}). If you wish to recreate these, please manually drop them (e.g. using psql) or optionally drop the {db} database and start again (e.g. using the subprocesses/_drop_study_region_database.py utility script.\n""") + return else: raise Exception( """Study region boundary creation failed; check configuration and log files to identify specific issues.""", ) - # output to completion log script_running_log(script, task, start, codename) engine.dispose() diff --git a/process/subprocesses/_report_functions.py b/process/subprocesses/_report_functions.py index dd5416f6..91208443 100644 --- a/process/subprocesses/_report_functions.py +++ b/process/subprocesses/_report_functions.py @@ -28,6 +28,14 @@ from subprocesses.batlow import batlow_map as cmap +def generate_metadata(region_dir, codename): + yml_in = f'{region_dir}/{codename}_metadata.yml' + xml_out = f'{region_dir}/{codename}_metadata.xml' + command = f'pygeometa metadata generate "{yml_in}" --output "{xml_out}" --schema iso19139-2' + sp.call(command, shell=True) + return xml_out + + def postgis_to_csv(file, db_host, db_user, db, db_pwd, table): """Export table from PostGIS database to CSV.""" command = ( From ccdfe28167551954feba1ea21e52221d73674728 Mon Sep 17 00:00:00 2001 From: Carl Higgs Date: Mon, 3 Apr 2023 16:14:16 +1000 Subject: [PATCH 19/26] refined the metadata output code, so this just oc curs in the generate resources stage --- process/3_generate_resources.py | 36 +++++- .../subprocesses/_01_create_study_region.py | 35 +----- process/subprocesses/_report_functions.py | 118 +++++++++++++++++- 3 files changed, 148 insertions(+), 41 deletions(-) diff --git a/process/3_generate_resources.py b/process/3_generate_resources.py index 940e8acb..3dafdc31 100644 --- a/process/3_generate_resources.py +++ b/process/3_generate_resources.py @@ -4,29 +4,38 @@ import shutil import sys +from sqlalchemy import create_engine from subprocesses._project_setup import ( + authors, codename, db, db_host, db_pwd, db_user, + email, folder_path, gtfs, indicators, + individualname, + name, policies, + positionname, region_config, region_names, + time, + url, + year, ) - -# import and set up functions from subprocesses._report_functions import ( - generate_metadata, + generate_metadata_xml, + generate_metadata_yml, generate_report_for_language, get_and_setup_language_cities, + get_terminal_columns, postgis_to_csv, postgis_to_geopackage, + print_autobreak, ) -from subprocesses._utils import get_terminal_columns, print_autobreak class config: @@ -56,6 +65,9 @@ class config: def main(): + engine = create_engine( + f'postgresql://{db_user}:{db_pwd}@{db_host}/{db}', future=True, + ) # List existing generated resources print('Analysis parameter summary text file') print(f' {codename}.yml') @@ -105,7 +117,21 @@ def main(): # Generate metadata print('\nMetadata') - metadata_xml = generate_metadata(config.region['region_dir'], codename) + metadata_yml = generate_metadata_yml( + engine, + folder_path, + region_config, + codename, + name, + year, + authors, + url, + individualname, + positionname, + email, + ) + print(f' {metadata_yml}') + metadata_xml = generate_metadata_xml(config.region['region_dir'], codename) print(f' {metadata_xml}') # Generate reports diff --git a/process/subprocesses/_01_create_study_region.py b/process/subprocesses/_01_create_study_region.py index 0f80d49b..e41b95c2 100644 --- a/process/subprocesses/_01_create_study_region.py +++ b/process/subprocesses/_01_create_study_region.py @@ -190,40 +190,7 @@ def main(): and db_contents.has_table('urban_study_region') and db_contents.has_table(buffered_urban_study_region) ): - print('Updating project metadata:', end='', flush=True) - sql = """SELECT ST_Extent(ST_Transform(geom,4326)) FROM urban_study_region;""" - with engine.begin() as connection: - bbox = ( - connection.execute(text(sql)) - .fetchone()[0] - .replace(' ', ',') - .replace('(', '[') - .replace(')', ']')[3:] - ) - - yml = ( - f'{folder_path}/process/configuration/assets/metadata_template.yml' - ) - datestamp = time.strftime('%Y-%m-%d') - dateyear = time.strftime('%Y') - spatial_bbox = bbox - spatial_crs = 'WGS84' - - with open(yml) as f: - metadata = f.read() - - metadata = metadata.format(**globals(), **locals()) - - metadata = ( - f'# {name} ({codename})\n' - f'# YAML metadata control file (MCF) template for pygeometa\n{metadata}' - ) - metadata_yml = f'{region_dir}/{codename}_metadata.yml' - with open(metadata_yml, 'w') as f: - f.write(metadata) - print(f' {os.path.basename(metadata_yml)}\n') - # print(f"""Study region boundaries have been created (study_region_boundary, urban_region, urban_study_region and {buffered_urban_study_region}). If you wish to recreate these, please manually drop them (e.g. using psql) or optionally drop the {db} database and start again (e.g. using the subprocesses/_drop_study_region_database.py utility script.\n""") - return + return """Study region boundaries have been created (study_region_boundary, urban_region, urban_study_region and {buffered_urban_study_region}). If you wish to recreate these, please manually drop them (e.g. using psql) or optionally drop the {db} database and start again (e.g. using the subprocesses/_drop_study_region_database.py utility script.\n""" else: raise Exception( """Study region boundary creation failed; check configuration and log files to identify specific issues.""", diff --git a/process/subprocesses/_report_functions.py b/process/subprocesses/_report_functions.py index 91208443..0e64f8a1 100644 --- a/process/subprocesses/_report_functions.py +++ b/process/subprocesses/_report_functions.py @@ -28,12 +28,126 @@ from subprocesses.batlow import batlow_map as cmap -def generate_metadata(region_dir, codename): +# 'pretty' text wrapping as per https://stackoverflow.com/questions/37572837/how-can-i-make-python-3s-print-fit-the-size-of-the-command-prompt +def get_terminal_columns(): + import shutil + + return shutil.get_terminal_size().columns + + +def print_autobreak(*args, sep=' '): + import textwrap + + width = ( + get_terminal_columns() + ) # Check size once to avoid rechecks per "paragraph" + # Convert all args to strings, join with separator, then split on any newlines, + # preserving line endings, so each "paragraph" wrapped separately + for line in sep.join(map(str, args)).splitlines(True): + # Py3's print function makes it easy to print textwrap.wrap's result as one-liner + print(*textwrap.wrap(line, width), sep='\n') + + +def wrap_autobreak(*args, sep=' '): + width = ( + get_terminal_columns() + ) # Check size once to avoid rechecks per "paragraph" + # Convert all args to strings, join with separator, then split on any newlines, + # preserving line endings, so each "paragraph" wrapped separately + for line in sep.join(map(str, args)).splitlines(True): + # Py3's print function makes it easy to print textwrap.wrap's result as one-liner + return '\n'.join(textwrap.wrap(line, width)) + + +def reproject_raster(inpath, outpath, new_crs): + import rasterio + from rasterio.warp import ( + Resampling, + calculate_default_transform, + reproject, + ) + + dst_crs = new_crs # CRS for web meractor + with rasterio.open(inpath) as src: + transform, width, height = calculate_default_transform( + src.crs, dst_crs, src.width, src.height, *src.bounds, + ) + kwargs = src.meta.copy() + kwargs.update( + { + 'crs': dst_crs, + 'transform': transform, + 'width': width, + 'height': height, + }, + ) + with rasterio.open(outpath, 'w', **kwargs) as dst: + for i in range(1, src.count + 1): + reproject( + source=rasterio.band(src, i), + destination=rasterio.band(dst, i), + src_transform=src.transform, + src_crs=src.crs, + dst_transform=transform, + dst_crs=dst_crs, + resampling=Resampling.nearest, + ) + + +def generate_metadata_yml( + engine, + folder_path, + region_config, + codename, + name, + year, + authors, + url, + individualname, + positionname, + email, +): + """Generate YAML metadata control file.""" + sql = """SELECT ST_Extent(ST_Transform(geom,4326)) FROM urban_study_region;""" + from sqlalchemy import text + + with engine.begin() as connection: + bbox = ( + connection.execute(text(sql)) + .fetchone()[0] + .replace(' ', ',') + .replace('(', '[') + .replace(')', ']')[3:] + ) + + yml = f'{folder_path}/process/configuration/assets/metadata_template.yml' + region_dir = region_config['region_dir'] + datestamp = time.strftime('%Y-%m-%d') + dateyear = time.strftime('%Y') + spatial_bbox = bbox + spatial_crs = 'WGS84' + + with open(yml) as f: + metadata = f.read() + + metadata = metadata.format(**locals()) + metadata = ( + f'# {name} ({codename})\n' + f'# YAML metadata control file (MCF) template for pygeometa\n{metadata}' + ) + metadata_yml = f'{region_dir}/{codename}_metadata.yml' + with open(metadata_yml, 'w') as f: + f.write(metadata) + return os.path.basename(metadata_yml) + + +def generate_metadata_xml(region_dir, codename): + """Generate xml metadata given a yml metadata control file as per the specification required by pygeometa.""" yml_in = f'{region_dir}/{codename}_metadata.yml' xml_out = f'{region_dir}/{codename}_metadata.xml' command = f'pygeometa metadata generate "{yml_in}" --output "{xml_out}" --schema iso19139-2' sp.call(command, shell=True) - return xml_out + return os.path.basename(xml_out) def postgis_to_csv(file, db_host, db_user, db, db_pwd, table): From 6d9555285a6021dcb41232fff15a0985cc32222e Mon Sep 17 00:00:00 2001 From: Carl Higgs Date: Mon, 3 Apr 2023 16:21:16 +1000 Subject: [PATCH 20/26] consolidated the _report_functions.py functions into _utils.py --- process/3_generate_resources.py | 2 +- .../_04_create_population_grid.py | 36 +- process/subprocesses/_report_functions.py | 1370 ---------------- process/subprocesses/_utils.py | 1404 ++++++++++++++++- 4 files changed, 1371 insertions(+), 1441 deletions(-) delete mode 100644 process/subprocesses/_report_functions.py diff --git a/process/3_generate_resources.py b/process/3_generate_resources.py index 3dafdc31..8c0534f6 100644 --- a/process/3_generate_resources.py +++ b/process/3_generate_resources.py @@ -26,7 +26,7 @@ url, year, ) -from subprocesses._report_functions import ( +from subprocesses._utils import ( generate_metadata_xml, generate_metadata_yml, generate_report_for_language, diff --git a/process/subprocesses/_04_create_population_grid.py b/process/subprocesses/_04_create_population_grid.py index 72be20ca..8e7ca5dc 100644 --- a/process/subprocesses/_04_create_population_grid.py +++ b/process/subprocesses/_04_create_population_grid.py @@ -12,7 +12,6 @@ # Set up project and region parameters for GHSCIC analyses from _project_setup import * -from _utils import reproject_raster from geoalchemy2 import Geometry from osgeo import gdal from script_running_log import script_running_log @@ -24,6 +23,41 @@ gdal.SetConfigOption('CPL_LOG', '/dev/null') # Linux/MacOS +def reproject_raster(inpath, outpath, new_crs): + import rasterio + from rasterio.warp import ( + Resampling, + calculate_default_transform, + reproject, + ) + + dst_crs = new_crs # CRS for web meractor + with rasterio.open(inpath) as src: + transform, width, height = calculate_default_transform( + src.crs, dst_crs, src.width, src.height, *src.bounds, + ) + kwargs = src.meta.copy() + kwargs.update( + { + 'crs': dst_crs, + 'transform': transform, + 'width': width, + 'height': height, + }, + ) + with rasterio.open(outpath, 'w', **kwargs) as dst: + for i in range(1, src.count + 1): + reproject( + source=rasterio.band(src, i), + destination=rasterio.band(dst, i), + src_transform=src.transform, + src_crs=src.crs, + dst_transform=transform, + dst_crs=dst_crs, + resampling=Resampling.nearest, + ) + + def main(): # simple timer for log file start = time.time() diff --git a/process/subprocesses/_report_functions.py b/process/subprocesses/_report_functions.py deleted file mode 100644 index 0e64f8a1..00000000 --- a/process/subprocesses/_report_functions.py +++ /dev/null @@ -1,1370 +0,0 @@ -""" -Report functions. - -Define functions used for formatting and saving indicator reports. -""" -import json -import os -import subprocess as sp -import time -from textwrap import wrap - -import fiona -import geopandas as gpd -import matplotlib as mpl -import matplotlib.font_manager as fm -import matplotlib.pyplot as plt -import matplotlib.ticker as ticker -import numpy as np -import pandas as pd -from babel.numbers import format_decimal as fnum -from babel.units import format_unit -from fpdf import FPDF, FlexTemplate -from matplotlib.cm import ScalarMappable -from matplotlib.lines import Line2D -from mpl_toolkits.axes_grid1 import make_axes_locatable -from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar -from mpl_toolkits.axes_grid1.inset_locator import inset_axes -from subprocesses.batlow import batlow_map as cmap - - -# 'pretty' text wrapping as per https://stackoverflow.com/questions/37572837/how-can-i-make-python-3s-print-fit-the-size-of-the-command-prompt -def get_terminal_columns(): - import shutil - - return shutil.get_terminal_size().columns - - -def print_autobreak(*args, sep=' '): - import textwrap - - width = ( - get_terminal_columns() - ) # Check size once to avoid rechecks per "paragraph" - # Convert all args to strings, join with separator, then split on any newlines, - # preserving line endings, so each "paragraph" wrapped separately - for line in sep.join(map(str, args)).splitlines(True): - # Py3's print function makes it easy to print textwrap.wrap's result as one-liner - print(*textwrap.wrap(line, width), sep='\n') - - -def wrap_autobreak(*args, sep=' '): - width = ( - get_terminal_columns() - ) # Check size once to avoid rechecks per "paragraph" - # Convert all args to strings, join with separator, then split on any newlines, - # preserving line endings, so each "paragraph" wrapped separately - for line in sep.join(map(str, args)).splitlines(True): - # Py3's print function makes it easy to print textwrap.wrap's result as one-liner - return '\n'.join(textwrap.wrap(line, width)) - - -def reproject_raster(inpath, outpath, new_crs): - import rasterio - from rasterio.warp import ( - Resampling, - calculate_default_transform, - reproject, - ) - - dst_crs = new_crs # CRS for web meractor - with rasterio.open(inpath) as src: - transform, width, height = calculate_default_transform( - src.crs, dst_crs, src.width, src.height, *src.bounds, - ) - kwargs = src.meta.copy() - kwargs.update( - { - 'crs': dst_crs, - 'transform': transform, - 'width': width, - 'height': height, - }, - ) - with rasterio.open(outpath, 'w', **kwargs) as dst: - for i in range(1, src.count + 1): - reproject( - source=rasterio.band(src, i), - destination=rasterio.band(dst, i), - src_transform=src.transform, - src_crs=src.crs, - dst_transform=transform, - dst_crs=dst_crs, - resampling=Resampling.nearest, - ) - - -def generate_metadata_yml( - engine, - folder_path, - region_config, - codename, - name, - year, - authors, - url, - individualname, - positionname, - email, -): - """Generate YAML metadata control file.""" - sql = """SELECT ST_Extent(ST_Transform(geom,4326)) FROM urban_study_region;""" - from sqlalchemy import text - - with engine.begin() as connection: - bbox = ( - connection.execute(text(sql)) - .fetchone()[0] - .replace(' ', ',') - .replace('(', '[') - .replace(')', ']')[3:] - ) - - yml = f'{folder_path}/process/configuration/assets/metadata_template.yml' - region_dir = region_config['region_dir'] - datestamp = time.strftime('%Y-%m-%d') - dateyear = time.strftime('%Y') - spatial_bbox = bbox - spatial_crs = 'WGS84' - - with open(yml) as f: - metadata = f.read() - - metadata = metadata.format(**locals()) - metadata = ( - f'# {name} ({codename})\n' - f'# YAML metadata control file (MCF) template for pygeometa\n{metadata}' - ) - metadata_yml = f'{region_dir}/{codename}_metadata.yml' - with open(metadata_yml, 'w') as f: - f.write(metadata) - return os.path.basename(metadata_yml) - - -def generate_metadata_xml(region_dir, codename): - """Generate xml metadata given a yml metadata control file as per the specification required by pygeometa.""" - yml_in = f'{region_dir}/{codename}_metadata.yml' - xml_out = f'{region_dir}/{codename}_metadata.xml' - command = f'pygeometa metadata generate "{yml_in}" --output "{xml_out}" --schema iso19139-2' - sp.call(command, shell=True) - return os.path.basename(xml_out) - - -def postgis_to_csv(file, db_host, db_user, db, db_pwd, table): - """Export table from PostGIS database to CSV.""" - command = ( - f'ogr2ogr -f "CSV" {file} ' - f'PG:"host={db_host} user={db_user} dbname={db} password={db_pwd}" ' - f' {table} ' - ) - sp.call(command, shell=True) - return file - - -def postgis_to_geopackage(gpkg, db_host, db_user, db, db_pwd, tables): - """Export selection of tables from PostGIS database to geopackage.""" - try: - os.remove(gpkg) - except FileNotFoundError: - pass - - for table in tables: - print(f' - {table}') - command = ( - f'ogr2ogr -update -overwrite -lco overwrite=yes -f GPKG {gpkg} ' - f'PG:"host={db_host} user={db_user} dbname={db} password={db_pwd}" ' - f' {table} ' - ) - sp.call(command, shell=True) - - -def get_and_setup_language_cities(config): - """Setup and return languages for given configuration.""" - if config.auto_language: - languages = pd.read_excel(config.configuration, sheet_name='languages') - languages = languages[languages['name'] == config.city].dropna( - axis=1, how='all', - ) - languages = list(languages.columns[2:]) - else: - languages = [config.language] - return languages - - -def generate_report_for_language( - config, language, indicators, policies, -): - """Generate report for a processed city in a given language.""" - city = config.city - font = get_and_setup_font(language, config) - # set up policies - city_policy = policy_data_setup(policies, config.region['policy_review']) - # get city and grid summary data - gpkg = config.region['gpkg'] - if not os.path.isfile(gpkg): - raise Exception( - f'\n\nRequired file {gpkg} could not be located. To proceed with report generation, please ensure that analysis for this region, including export of geopackage, has been completed successfully.\n\n', - ) - layers = fiona.listlayers(gpkg) - if not any([x.startswith(city) for x in layers]): - raise Exception( - f'\n\nResult summary layers for grid and city could not be located in the geopackage {gpkg}. To proceed with report generation, please ensure that analysis for this region has been completed successfully.\n\n', - ) - gdfs = {} - for gdf in ['city', 'grid']: - gdfs[gdf] = gpd.read_file( - gpkg, - layer=[ - layer - for layer in layers - if layer.startswith( - config.region[f'{gdf}_summary'].strip( - time.strftime('%Y-%m-%d'), - ), - ) - ][0], - ) - # The below currently relates walkability to specified reference - # (e.g. the GHSCIC 25 city median, following standardisation using - # 25-city mean and standard deviation for sub-indicators) - gdfs['grid'] = evaluate_comparative_walkability( - gdfs['grid'], indicators['report']['walkability']['ghscic_reference'], - ) - indicators['report']['walkability'][ - 'walkability_above_median_pct' - ] = evaluate_threshold_pct( - gdfs['grid'], - 'all_cities_walkability', - '>', - indicators['report']['walkability']['ghscic_walkability_reference'], - ) - for i in indicators['report']['thresholds']: - indicators['report']['thresholds'][i]['pct'] = evaluate_threshold_pct( - gdfs['grid'], - indicators['report']['thresholds'][i]['field'], - indicators['report']['thresholds'][i]['relationship'], - indicators['report']['thresholds'][i]['criteria'], - ) - # set up phrases - phrases = prepare_phrases(config, city, language) - # Generate resources - if config.generate_resources: - print(f'\nFigures and maps ({language})') - capture_return = generate_resources( - config, - gdfs['city'], - gdfs['grid'], - phrases, - indicators, - city_policy, - language, - cmap, - ) - # instantiate template - for template in config.templates: - print(f'\nReport ({template} PDF template; {language})') - capture_return = generate_scorecard( - config, phrases, indicators, city_policy, language, template, font, - ) - print(capture_return) - - -def get_and_setup_font(language, config): - """Setup and return font for given language configuration.""" - fonts = pd.read_excel(config.configuration, sheet_name='fonts') - if language.replace(' (Auto-translation)', '') in fonts.Language.unique(): - fonts = fonts.loc[ - fonts['Language'] == language.replace(' (Auto-translation)', '') - ].fillna('') - else: - fonts = fonts.loc[fonts['Language'] == 'default'].fillna('') - main_font = fonts.File.values[0].strip() - fm.fontManager.addfont(main_font) - prop = fm.FontProperties(fname=main_font) - fm.findfont(prop=prop, directory=main_font, rebuild_if_missing=True) - plt.rcParams['font.family'] = prop.get_name() - font = fonts.Font.values[0] - return font - - -def policy_data_setup(policies, policy_review): - """Returns a dictionary of policy data.""" - review = pd.read_excel(policy_review, index_col=0) - df_policy = {} - # Presence score - df_policy['Presence_rating'] = review.loc['Score']['Policy identified'] - # Quality score - df_policy['Checklist_rating'] = review.loc['Score']['Quality'] - # Presence - df_policy['Presence'] = review.loc[ - [p['Policy'] for p in policies if p['Display'] == 'Presence'] - ].apply(lambda x: x['Weight'] * x['Policy identified'], axis=1) - # GDP - df_policy['Presence_gdp'] = pd.DataFrame( - [ - { - c: p[c] - for c in p - if c - in ['Label', 'gdp_comparison_middle', 'gdp_comparison_upper'] - } - for p in policies - if p['Display'] == 'Presence' - ], - ) - df_policy['Presence_gdp'].columns = ['Policy', 'middle', 'upper'] - df_policy['Presence_gdp'].set_index('Policy', inplace=True) - # Urban Checklist - df_policy['Checklist'] = review.loc[ - [p['Policy'] for p in policies if p['Display'] == 'Checklist'] - ]['Checklist'] - # Public open space checklist - df_policy['POS'] = review.loc[ - [p['Policy'] for p in policies if p['Display'] == 'POS'] - ]['Checklist'] - # Public transport checklist - df_policy['PT'] = review.loc[ - [p['Policy'] for p in policies if p['Display'] == 'PT'] - ]['Checklist'] - return df_policy - - -def evaluate_comparative_walkability(gdf_grid, reference): - """Evaluate walkability relative to 25-city study reference.""" - for x in reference: - gdf_grid[f'z_{x}'] = (gdf_grid[x] - reference[x]['mean']) / reference[ - x - ]['sd'] - gdf_grid['all_cities_walkability'] = sum( - [gdf_grid[f'z_{x}'] for x in reference], - ) - return gdf_grid - - -def evaluate_threshold_pct( - gdf_grid, indicator, relationship, reference, population='pop_est', -): - """Evaluate whether a pandas series meets a threshold criteria (eg. '<' or '>'.""" - percentage = round( - 100 - * gdf_grid.query(f'{indicator} {relationship} {reference}')[ - population - ].sum() - / gdf_grid[population].sum(), - 1, - ) - return percentage - - -def generate_resources( - config, - gdf_city, - gdf_grid, - phrases, - indicators, - city_policy, - language, - cmap, -): - """ - The function prepares a series of image resources required for the global indicator score cards. - - The city_path string variable is returned, where generated resources will be stored upon successful execution. - """ - figure_path = f'{config.region["region_dir"]}/figures' - locale = phrases['locale'] - city_stats = compile_city_stats(gdf_city, indicators, phrases) - if not os.path.exists(figure_path): - os.mkdir(figure_path) - # Spatial access liveability profile - li_profile( - city_stats=city_stats, - title=phrases['Population % with access within 500m to...'], - cmap=cmap, - phrases=phrases, - path=f'{figure_path}/access_profile_{language}.jpg', - ) - print(f' figures/access_profile_{language}.jpg') - ## constrain extreme outlying walkability for representation - gdf_grid['all_cities_walkability'] = gdf_grid[ - 'all_cities_walkability' - ].apply(lambda x: -6 if x < -6 else (6 if x > 6 else x)) - # Spatial distribution maps - spatial_maps = compile_spatial_map_info( - indicators['report']['spatial_distribution_figures'], - gdf_city, - phrases, - locale, - language=language, - ) - for f in spatial_maps: - spatial_dist_map( - gdf_grid, - column=f, - range=spatial_maps[f]['range'], - label=spatial_maps[f]['label'], - tick_labels=spatial_maps[f]['tick_labels'], - cmap=cmap, - path=f'{figure_path}/{spatial_maps[f]["outfile"]}', - phrases=phrases, - locale=locale, - ) - print(f" figures/{spatial_maps[f]['outfile']}") - # Threshold maps - for scenario in indicators['report']['thresholds']: - threshold_map( - gdf_grid, - column=indicators['report']['thresholds'][scenario]['field'], - scale=indicators['report']['thresholds'][scenario]['scale'], - comparison=indicators['report']['thresholds'][scenario][ - 'criteria' - ], - label=( - f"{phrases[indicators['report']['thresholds'][scenario]['title']]} ({phrases['density_units']})" - ), - cmap=cmap, - path=f"{figure_path}/{indicators['report']['thresholds'][scenario]['field']}_{language}.jpg", - phrases=phrases, - locale=locale, - ) - print( - f" figures/{indicators['report']['thresholds'][scenario]['field']}_{language}.jpg", - ) - # Policy ratings - policy_rating( - range=[0, 24], - score=city_policy['Presence_rating'], - comparison=indicators['report']['policy']['comparisons']['presence'], - label='', - comparison_label=phrases['25 city comparison'], - cmap=cmap, - locale=locale, - path=f'{figure_path}/policy_presence_rating_{language}.jpg', - ) - print(f' figures/policy_presence_rating_{language}.jpg') - policy_rating( - range=[0, 57], - score=city_policy['Checklist_rating'], - comparison=indicators['report']['policy']['comparisons']['quality'], - label='', - comparison_label=phrases['25 city comparison'], - cmap=cmap, - locale=locale, - path=f'{figure_path}/policy_checklist_rating_{language}.jpg', - ) - print(f' figures/policy_checklist_rating_{language}.jpg') - return figure_path - - -def fpdf2_mm_scale(mm): - """Returns a width double that of the conversion of mm to inches. - - This has been found, via trial and error, to be useful when preparing images for display in generated PDFs using fpdf2. - """ - return 2 * mm / 25.4 - - -def _pct(value, locale, length='short'): - """Formats a percentage sign according to a given locale.""" - return format_unit(value, 'percent', locale=locale, length=length) - - -def compile_city_stats(gdf_city, indicators, phrases): - """Compile a set of city statistics with comparisons, given a processed geodataframe of city summary statistics and a dictionary of indicators including reference percentiles.""" - city_stats = {} - city_stats['access'] = gdf_city[ - indicators['report']['accessibility'].keys() - ].transpose()[0] - city_stats['access'].index = [ - indicators['report']['accessibility'][x]['title'] - if city_stats['access'][x] is not None - else f"{indicators['report']['accessibility'][x]['title']} (not evaluated)" - for x in city_stats['access'].index - ] - city_stats['access'] = city_stats['access'].fillna( - 0, - ) # for display purposes - city_stats['comparisons'] = { - indicators['report']['accessibility'][x]['title']: indicators[ - 'report' - ]['accessibility'][x]['ghscic_reference'] - for x in indicators['report']['accessibility'] - } - city_stats['percentiles'] = {} - for percentile in ['p25', 'p50', 'p75']: - city_stats['percentiles'][percentile] = [ - city_stats['comparisons'][x][percentile] - for x in city_stats['comparisons'].keys() - ] - city_stats['access'].index = [ - phrases[x] for x in city_stats['access'].index - ] - return city_stats - - -def compile_spatial_map_info( - spatial_distribution_figures, gdf_city, phrases, locale, language, -): - """ - Compile required information to produce spatial distribution figures. - - This is done using the information recorded in configuration/indicators.yml; specifically, indicators['report']['spatial_distribution_figures'] - """ - # effectively deep copy the supplied dictionary so its not mutable - spatial_maps = json.loads(json.dumps(spatial_distribution_figures)) - for i in spatial_maps: - for text in ['label', 'outfile']: - spatial_maps[i][text] = spatial_maps[i][text].format(**locals()) - if spatial_maps[i]['tick_labels'] is not None: - spatial_maps[i]['tick_labels'] = [ - x.format(**{'phrases': phrases}) - for x in spatial_maps[i]['tick_labels'] - ] - if i.startswith('pct_'): - city_summary_percent = _pct( - fnum(gdf_city[f'pop_{i}'].fillna(0)[0], '0.0', locale), locale, - ) - spatial_maps[i][ - 'label' - ] = f'{spatial_maps[i]["label"]} ({city_summary_percent})' - if gdf_city['pop_pct_access_500m_pt_gtfs_freq_20_score'][ - 0 - ] is None or pd.isna( - gdf_city['pop_pct_access_500m_pt_gtfs_freq_20_score'][0], - ): - spatial_maps['pct_access_500m_pt_any_score'] = spatial_maps.pop( - 'pct_access_500m_pt_gtfs_freq_20_score', - ) - spatial_maps['pct_access_500m_pt_any_score']['label'] = ( - f'{phrases["Percentage of population with access to public transport"]}\n' - f'({_pct(fnum(gdf_city["pop_pct_access_500m_pt_any_score"][0],"0.0",locale),locale)})' - ) - return spatial_maps - - -def add_scalebar( - ax, - length, - multiplier, - units, - fontproperties, - loc='upper left', - pad=0, - color='black', - frameon=False, - size_vertical=2, - locale='en', -): - """ - Adds a scalebar to matplotlib map. - - Requires import of: from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar - As a rule of thumb, a scalebar of 1/3 of feature size seems appropriate. - For example, to achieve this, calculate the variable 'length' as - - gdf_width = gdf.geometry.total_bounds[2] - gdf.geometry.total_bounds[0] - scalebar_length = int(gdf_width / (3000)) - """ - scalebar = AnchoredSizeBar( - ax.transData, - length * multiplier, - format_unit(length, units, locale=locale, length='short'), - loc=loc, - pad=pad, - color=color, - frameon=frameon, - size_vertical=size_vertical, - fontproperties=fontproperties, - ) - ax.add_artist(scalebar) - - -def add_localised_north_arrow( - ax, - text='N', - xy=(1, 0.96), - textsize=14, - arrowprops=dict(facecolor='black', width=4, headwidth=8), -): - """ - Add a minimal north arrow with custom text label above it to a matplotlib map. - - This can be used to add, for example, 'N' or other language equivalent. Default placement is in upper right corner of map. - """ - arrow = ax.annotate( - '', - xy=(1, 0.96), - xycoords=ax.transAxes, - xytext=(0, -0.5), - textcoords='offset pixels', - va='center', - ha='center', - arrowprops=arrowprops, - ) - ax.annotate( - text, - xy=(0.5, 1.5), - xycoords=arrow, - va='center', - ha='center', - fontsize=textsize, - ) - - -## radar chart -def li_profile( - city_stats, - title, - cmap, - path, - phrases, - width=fpdf2_mm_scale(80), - height=fpdf2_mm_scale(80), - dpi=300, -): - """ - Generates a radar chart for city liveability profiles. - - Expands on https://www.python-graph-gallery.com/web-circular-barplot-with-matplotlib - -- A python code blog post by Yan Holtz, in turn expanding on work of Tomás Capretto and Tobias Stadler. - """ - figsize = (width, height) - # Values for the x axis - ANGLES = np.linspace( - 0.15, 2 * np.pi - 0.05, len(city_stats['access']), endpoint=False, - ) - VALUES = city_stats['access'].values - COMPARISON = city_stats['percentiles']['p50'] - INDICATORS = city_stats['access'].index - # Colours - GREY12 = '#1f1f1f' - norm = mpl.colors.Normalize(vmin=0, vmax=100) - COLORS = cmap(list(norm(VALUES))) - # Initialize layout in polar coordinates - textsize = 11 - fig, ax = plt.subplots(figsize=figsize, subplot_kw={'projection': 'polar'}) - # Set background color to white, both axis and figure. - fig.patch.set_facecolor('white') - ax.set_facecolor('white') - ax.set_theta_offset(1.2 * np.pi / 2) - ax.set_ylim(-50, 125) - # Add geometries to the plot ------------------------------------- - # Add bars to represent the cumulative track lengths - ax.bar(ANGLES, VALUES, color=COLORS, alpha=0.9, width=0.52, zorder=10) - # Add interquartile comparison reference lines - ax.vlines( - ANGLES, - city_stats['percentiles']['p25'], - city_stats['percentiles']['p75'], - color=GREY12, - zorder=11, - ) - # Add dots to represent the mean gain - comparison_text = '\n'.join( - wrap(phrases['25 city comparison'], 17, break_long_words=False), - ) - ax.scatter( - ANGLES, - COMPARISON, - s=60, - color=GREY12, - zorder=11, - label=comparison_text, - ) - # Add labels for the indicators - try: - LABELS = [ - '\n'.join(wrap(r, 12, break_long_words=False)) for r in INDICATORS - ] - except Exception: - LABELS = INDICATORS - # Set the labels - ax.set_xticks(ANGLES) - ax.set_xticklabels(LABELS, size=textsize) - # Remove lines for polar axis (x) - ax.xaxis.grid(False) - # Put grid lines for radial axis (y) at 0, 1000, 2000, and 3000 - ax.set_yticklabels([]) - ax.set_yticks([0, 25, 50, 75, 100]) - # Remove spines - ax.spines['start'].set_color('none') - ax.spines['polar'].set_color('none') - # Adjust padding of the x axis labels ---------------------------- - # This is going to add extra space around the labels for the - # ticks of the x axis. - XTICKS = ax.xaxis.get_major_ticks() - for tick in XTICKS: - tick.set_pad(10) - # Add custom annotations ----------------------------------------- - # The following represent the heights in the values of the y axis - PAD = 0 - for num in [0, 50, 100]: - ax.text( - -0.2 * np.pi / 2, - num + PAD, - f'{num}%', - ha='center', - va='center', - backgroundcolor='white', - size=textsize, - ) - # Add text to explain the meaning of the height of the bar and the - # height of the dot - ax.text( - ANGLES[0], - -50, - '\n'.join(wrap(title, 13, break_long_words=False)), - rotation=0, - ha='center', - va='center', - size=textsize, - zorder=12, - ) - angle = np.deg2rad(130) - ax.legend( - loc='lower right', - bbox_to_anchor=(0.58 + np.cos(angle) / 2, 0.46 + np.sin(angle) / 2), - ) - fig.savefig(path, dpi=dpi) - plt.close(fig) - - -## Spatial distribution mapping -def spatial_dist_map( - gdf, - column, - range, - label, - tick_labels, - cmap, - path, - width=fpdf2_mm_scale(88), - height=fpdf2_mm_scale(80), - dpi=300, - phrases={'north arrow': 'N', 'km': 'km'}, - locale='en', -): - """Spatial distribution maps using geopandas geodataframe.""" - figsize = (width, height) - textsize = 14 - fig, ax = plt.subplots(figsize=figsize) - ax.set_axis_off() - divider = make_axes_locatable(ax) # Define 'divider' for the axes - # Legend axes will be located at the 'bottom' of figure, with width '5%' of ax and - # a padding between them equal to '0.1' inches - cax = divider.append_axes('bottom', size='5%', pad=0.1) - gdf.plot( - column=column, - ax=ax, - legend=True, - vmin=range[0], - vmax=range[1], - legend_kwds={ - 'label': '\n'.join(wrap(label, 60, break_long_words=False)) - if label.find('\n') < 0 - else label, - 'orientation': 'horizontal', - }, - cax=cax, - cmap=cmap, - ) - # scalebar - add_scalebar( - ax, - length=int( - (gdf.geometry.total_bounds[2] - gdf.geometry.total_bounds[0]) - / (3000), - ), - multiplier=1000, - units='kilometer', - locale=locale, - fontproperties=fm.FontProperties(size=textsize), - ) - # north arrow - add_localised_north_arrow(ax, text=phrases['north arrow']) - # axis formatting - cax.tick_params(labelsize=textsize) - cax.xaxis.label.set_size(textsize) - if tick_labels is not None: - # cax.set_xticks(cax.get_xticks().tolist()) - # cax.set_xticklabels(tick_labels) - cax.xaxis.set_major_locator(ticker.MaxNLocator(len(tick_labels))) - ticks_loc = cax.get_xticks().tolist() - cax.xaxis.set_major_locator(ticker.FixedLocator(ticks_loc)) - cax.set_xticklabels(tick_labels) - plt.tight_layout() - fig.savefig(path, dpi=dpi) - plt.close(fig) - - -def threshold_map( - gdf, - column, - comparison, - scale, - label, - cmap, - path, - width=fpdf2_mm_scale(88), - height=fpdf2_mm_scale(80), - dpi=300, - phrases={'north arrow': 'N', 'km': 'km'}, - locale='en', -): - """Create threshold indicator map.""" - figsize = (width, height) - textsize = 14 - fig, ax = plt.subplots(figsize=figsize) - ax.set_axis_off() - divider = make_axes_locatable(ax) # Define 'divider' for the axes - # Legend axes will be located at the 'bottom' of figure, with width '5%' of ax and - # a padding between them equal to '0.1' inches - cax = divider.append_axes('bottom', size='5%', pad=0.1) - gdf.plot( - column=column, - ax=ax, - legend=True, - legend_kwds={ - 'label': '\n'.join(wrap(label, 60, break_long_words=False)) - if label.find('\n') < 0 - else label, - 'orientation': 'horizontal', - }, - cax=cax, - cmap=cmap, - ) - # scalebar - add_scalebar( - ax, - length=int( - (gdf.geometry.total_bounds[2] - gdf.geometry.total_bounds[0]) - / (3000), - ), - multiplier=1000, - units='kilometer', - locale=locale, - fontproperties=fm.FontProperties(size=textsize), - ) - # north arrow - add_localised_north_arrow(ax, text=phrases['north arrow']) - # axis formatting - cax.xaxis.set_major_formatter(ticker.EngFormatter()) - cax.tick_params(labelsize=textsize) - cax.xaxis.label.set_size(textsize) - plt.tight_layout() - fig.savefig(path, dpi=dpi) - plt.close(fig) - - -def policy_rating( - range, - score, - cmap, - comparison=None, - width=fpdf2_mm_scale(70), - height=fpdf2_mm_scale(15), - label='Policies identified', - comparison_label='25 city median', - locale='en', - path='policy_rating_test.jpg', - dpi=300, -): - """ - Plot a score (policy rating) and optional comparison (e.g. 25 cities median score) on a colour bar. - - Applied in this context for policy presence and policy quality scores. - """ - textsize = 14 - fig, ax = plt.subplots(figsize=(width, height)) - fig.subplots_adjust(bottom=0) - cmap = cmap - norm = mpl.colors.Normalize(vmin=range[0], vmax=range[1]) - fig.colorbar( - mpl.cm.ScalarMappable(norm=norm, cmap=cmap), - cax=ax, - orientation='horizontal', - # shrink=0.9, pad=0, aspect=90 - ) - # Format Global ticks - if comparison is None: - ax.xaxis.set_ticks([]) - else: - ax.xaxis.set_major_locator(ticker.FixedLocator([comparison])) - # ax.set_xticklabels([comparison_label]) - ax.set_xticklabels(['']) - ax.tick_params(labelsize=textsize) - ax.plot( - comparison, - 0, - marker='v', - color='black', - markersize=9, - zorder=10, - clip_on=False, - ) - if comparison < 7: - for t in ax.get_yticklabels(): - t.set_horizontalalignment('left') - if comparison > 18: - for t in ax.get_yticklabels(): - t.set_horizontalalignment('right') - # Format City ticks - ax_city = ax.twiny() - ax_city.set_xlim(range) - ax_city.xaxis.set_major_locator(ticker.FixedLocator([score])) - ax_city.plot( - score, - 1, - marker='^', - color='black', - markersize=9, - zorder=10, - clip_on=False, - ) - sep = '' - # if comparison is not None and label=='': - ax_city.set_xticklabels( - [f"{sep}{str(score).rstrip('0').rstrip('.')}/{range[1]}{label}"], - ) - ax_city.tick_params(labelsize=textsize) - # return figure with final styling - xlabel = f"{comparison_label} ({fnum(comparison,'0.0',locale)})" - ax.set_xlabel( - xlabel, labelpad=0.5, fontsize=textsize, - ) - plt.tight_layout() - fig.savefig(path, dpi=dpi) - plt.close(fig) - - -def pdf_template_setup( - config, template='template_web', font=None, language='English', -): - """ - Takes a template xlsx sheet defining elements for use in fpdf2's FlexTemplate function. - - This is loosely based on the specification at https://pyfpdf.github.io/fpdf2/Templates.html - However, it has been modified to allow additional definitions which are parsed - by this function - - can define the page for which template elements are to be applied - - colours are specified using standard hexadecimal codes - Any blank cells are set to represent "None". - The function returns a dictionary of elements, indexed by page number strings. - """ - # read in elements - elements = pd.read_excel(config.configuration, sheet_name=template) - document_pages = elements.page.unique() - # Conditional formatting to help avoid inappropriate line breaks and gaps in Tamil and Thai - if language in ['Tamil', 'Thai']: - elements['align'] = elements['align'].replace('J', 'L') - elements.loc[ - (elements['type'] == 'T') & (elements['size'] < 12), 'size', - ] = ( - elements.loc[ - (elements['type'] == 'T') & (elements['size'] < 12), 'size', - ] - - 1 - ) - if font is not None: - elements.loc[elements.font == 'custom', 'font'] = font - elements = elements.to_dict(orient='records') - elements = [ - {k: v if not str(v) == 'nan' else None for k, v in x.items()} - for x in elements - ] - # Need to convert hexadecimal colours (eg FFFFFF is white) to - # decimal colours for the fpdf Template class to work - # We'll establish default hex colours for foreground and background - planes = {'foreground': '000000', 'background': 'FFFFFF'} - for i, element in enumerate(elements): - for plane in planes: - if elements[i][plane] is not None: - # this assumes a hexadecimal string without the 0x prefix - elements[i][plane] = int(elements[i][plane], 16) - else: - elements[i][plane] = int(planes[plane], 16) - pages = {} - for page in document_pages: - pages[f'{page}'] = [x for x in elements if x['page'] == page] - return pages - - -def format_pages(pages, phrases): - """Format pages with phrases.""" - for page in pages: - for i, item in enumerate(pages[page]): - if item['name'] in phrases: - try: - pages[page][i]['text'] = phrases[item['name']].format( - city=phrases['city_name'], - country=phrases['country_name'], - study_doi=phrases['study_doi'], - citation_series=phrases['citation_series'], - citation_doi=phrases['citation_doi'], - citation_population=phrases['citation_population'], - citation_boundaries=phrases['citation_boundaries'], - citation_features=phrases['citation_features'], - citation_colour=phrases['citation_colour'], - ) - except Exception: - pages[f'{page}'][i]['text'] = phrases[item['name']] - return pages - - -def prepare_phrases(config, city, language): - """Prepare dictionary for specific language translation given English phrase.""" - languages = pd.read_excel(config.configuration, sheet_name='languages') - phrases = json.loads(languages.set_index('name').to_json())[language] - city_details = pd.read_excel( - config.configuration, sheet_name='city_details', index_col='City', - ) - country_code = config.region['country_code'] - # set default English country code - if language == 'English' and country_code not in ['AU', 'GB', 'US']: - country_code = 'AU' - phrases['locale'] = f'{phrases["language_code"]}_{country_code}' - # extract English language variables - phrases['metadata_author'] = languages.loc[ - languages['name'] == 'title_author', 'English', - ].values[0] - phrases['metadata_title1'] = languages.loc[ - languages['name'] == 'title_series_line1', 'English', - ].values[0] - phrases['metadata_title2'] = languages.loc[ - languages['name'] == 'title_series_line2', 'English', - ].values[0] - phrases['country'] = languages.loc[ - languages['name'] == f'{city} - Country', 'English', - ].values[0] - # restrict to specific language - languages = languages.loc[ - languages['role'] == 'template', ['name', language], - ] - phrases['vernacular'] = languages.loc[ - languages['name'] == 'language', language, - ].values[0] - phrases['city_name'] = languages.loc[ - languages['name'] == city, language, - ].values[0] - phrases['country_name'] = languages.loc[ - languages['name'] == f'{city} - Country', language, - ].values[0] - phrases['city'] = config.region['name'] - phrases['study_doi'] = f'https://doi.org/{city_details["DOI"]["Study"]}' - phrases['city_doi'] = f'https://doi.org/{city_details["DOI"][city]}' - phrases['study_executive_names'] = city_details['Names']['Study'] - phrases['local_collaborators_names'] = city_details['Names'][city] - phrases['Image 1 file'] = city_details['Image 1 file'][city] - phrases['Image 2 file'] = city_details['Image 2 file'][city] - phrases['Image 1 credit'] = city_details['Image 1 credit'][city] - phrases['Image 2 credit'] = city_details['Image 2 credit'][city] - phrases['region_population_citation'] = config.region['population'][ - 'citation' - ] - phrases['region_urban_region_citation'] = config.region['urban_region'][ - 'citation' - ] - phrases['region_OpenStreetMap_citation'] = config.region['OpenStreetMap'][ - 'citation' - ] - # incoporating study citations - citation_json = json.loads(city_details['exceptions_json']['Study']) - # handle city-specific exceptions - city_exceptions = json.loads(city_details['exceptions_json'][city]) - if language in city_exceptions: - city_exceptions = json.loads( - city_exceptions[language].replace("'", '"'), - ) - for e in city_exceptions: - phrases[e] = city_exceptions[e].replace('|', '\n') - for citation in citation_json: - if citation != 'citation_doi' or 'citation_doi' not in phrases: - phrases[citation] = ( - citation_json[citation].replace('|', '\n').format(**phrases) - ) - phrases['citation_doi'] = phrases['citation_doi'].format(**phrases) - return phrases - - -def wrap_sentences(words, limit=50, delimiter=''): - """Wrap sentences if exceeding limit.""" - sentences = [] - sentence = '' - gap = len(delimiter) - for i, word in enumerate(words): - if i == 0: - sentence = word - continue - # combine word to sentence if under limit - if len(sentence) + gap + len(word) <= limit: - sentence = sentence + delimiter + word - else: - sentences.append(sentence) - sentence = word - # append the final word if not yet appended - if i == len(words) - 1: - sentences.append(sentence) - # finally, append sentence of all words if still below limit - if (i == len(words) - 1) and (sentences == []): - sentences.append(sentence) - return sentences - - -def prepare_pdf_fonts(pdf, config, language): - """Prepare PDF fonts.""" - fonts = pd.read_excel(config.configuration, sheet_name='fonts') - fonts = ( - fonts.loc[ - fonts['Language'].isin( - ['default', language.replace(' (Auto-translation)', '')], - ) - ] - .fillna('') - .drop_duplicates() - ) - for s in ['', 'B', 'I', 'BI']: - for langue in ['default', language]: - if ( - langue.replace(' (Auto-translation)', '') - in fonts.Language.unique() - ): - f = fonts.loc[ - ( - fonts['Language'] - == langue.replace(' (Auto-translation)', '') - ) - & (fonts['Style'] == s) - ] - if f'{f.Font.values[0]}{s}' not in pdf.fonts.keys(): - pdf.add_font( - f.Font.values[0], style=s, fname=f.File.values[0], - ) - - -def save_pdf_layout(pdf, folder, template, filename): - """Save a PDF report in template subfolder in specified location.""" - if not os.path.exists(folder): - os.mkdir(folder) - template_folder = f'{folder}/_{template} reports' - if not os.path.exists(template_folder): - os.mkdir(template_folder) - pdf.output(f'{template_folder}/{filename}') - return f' _{template} reports/{filename}'.replace('/home/ghsci/work/', '') - - -def generate_scorecard( - config, - phrases, - indicators, - city_policy, - language='English', - template='web', - font=None, -): - """ - Format a PDF using the pyfpdf FPDF2 library, and drawing on definitions from a UTF-8 CSV file. - - Included in this function is the marking of a policy 'scorecard', with ticks, crosses, etc. - """ - locale = phrases['locale'] - # Set up PDF document template pages - pages = pdf_template_setup(config, 'template_web', font, language) - pages = format_pages(pages, phrases) - # initialise PDF - pdf = FPDF(orientation='portrait', format='A4', unit='mm') - # set up fonts - prepare_pdf_fonts(pdf, config, language) - pdf.set_author(phrases['metadata_author']) - pdf.set_title(f"{phrases['metadata_title1']} {phrases['metadata_title2']}") - pdf.set_auto_page_break(False) - if template == 'web': - pdf = pdf_for_web( - pdf, - pages, - config, - language, - locale, - phrases, - indicators, - city_policy, - ) - elif template == 'print': - pdf = pdf_for_print( - pdf, - pages, - config, - language, - locale, - phrases, - indicators, - city_policy, - ) - # Output report pdf - filename = f"{phrases['city_name']} - {phrases['title_series_line1'].replace(':','')} - GHSCIC 2022 - {phrases['vernacular']}.pdf" - if phrases['_export'] == 1: - capture_result = save_pdf_layout( - pdf, - folder=config.region['region_dir'], - template=template, - filename=filename, - ) - return capture_result - else: - return 'Skipped.' - - -def pdf_for_web( - pdf, pages, config, language, locale, phrases, indicators, city_policy, -): - """ - Generate a PDF based on a template for web distribution. - - This template includes reporting on both policy and spatial indicators. - """ - city = config.city - city_path = config.region['region_dir'] - figure_path = f'{city_path}/figures' - # Set up Cover page - pdf.add_page() - template = FlexTemplate(pdf, elements=pages['1']) - if os.path.exists( - f'{config.folder_path}/process/configuration/assets/{phrases["Image 1 file"]}', - ): - template[ - 'hero_image' - ] = f'{config.folder_path}/process/configuration/assets/{phrases["Image 1 file"]}' - template['hero_alt'] = '' - template['Image 1 credit'] = phrases['Image 1 credit'] - template.render() - # Set up next page - pdf.add_page() - template = FlexTemplate(pdf, elements=pages['2']) - template['citations'] = phrases['citations'] - template['study_executive_names'] = phrases['study_executive_names'] - template['local_collaborators'] = template['local_collaborators'].format( - title_city=phrases['title_city'], - ) - template['local_collaborators_names'] = phrases[ - 'local_collaborators_names' - ] - if phrases['translation_names'] is None: - template['translation'] = '' - template['translation_names'] = '' - template.render() - # Set up next page - pdf.add_page() - template = FlexTemplate(pdf, elements=pages['3']) - template[ - 'introduction' - ] = f"{phrases['series_intro']}\n\n{phrases['series_interpretation']}".format( - **phrases, - ) - ## Access profile plot - template['access_profile'] = f'{figure_path}/access_profile_{language}.jpg' - ## Walkability plot - template[ - 'all_cities_walkability' - ] = f'{figure_path}/all_cities_walkability_{language}.jpg' - template['walkability_above_median_pct'] = phrases[ - 'walkability_above_median_pct' - ].format( - _pct( - fnum( - indicators['report']['walkability'][ - 'walkability_above_median_pct' - ], - '0.0', - locale, - ), - locale, - ), - ) - ## Policy ratings - template[ - 'presence_rating' - ] = f'{figure_path}/policy_presence_rating_{language}.jpg' - template[ - 'quality_rating' - ] = f'{figure_path}/policy_checklist_rating_{language}.jpg' - template['city_header'] = phrases['city_name'] - ## City planning requirement presence (round 0.5 up to 1) - policy_indicators = {0: '✗', 0.5: '~', 1: '✓'} - for x in range(1, 7): - # check presence - template[f'policy_urban_text{x}_response'] = policy_indicators[ - city_policy['Presence'][x - 1] - ] - # format percentage units according to locale - for gdp in ['middle', 'upper']: - template[f'policy_urban_text{x}_{gdp}'] = _pct( - float(city_policy['Presence_gdp'].iloc[x - 1][gdp]), - locale, - length='short', - ) - ## Walkable neighbourhood policy checklist - for i, policy in enumerate(city_policy['Checklist'].index): - row = i + 1 - for j, item in enumerate([x for x in city_policy['Checklist'][i]]): - col = j + 1 - template[f"policy_{'Checklist'}_text{row}_response{col}"] = item - template.render() - # Set up next page - pdf.add_page() - template = FlexTemplate(pdf, elements=pages['4']) - ## Density plots - template[ - 'local_nh_population_density' - ] = f'{figure_path}/local_nh_population_density_{language}.jpg' - template[ - 'local_nh_intersection_density' - ] = f'{figure_path}/local_nh_intersection_density_{language}.jpg' - ## Density threshold captions - for scenario in indicators['report']['thresholds']: - template[scenario] = phrases[f'optimal_range - {scenario}'].format( - _pct( - fnum( - indicators['report']['thresholds'][scenario]['pct'], - '0.0', - locale, - ), - locale, - ), - fnum( - indicators['report']['thresholds'][scenario]['criteria'], - '#,000', - locale, - ), - phrases['density_units'], - ) - if os.path.exists( - f'{config.folder_path}/process/configuration/assets/{phrases["Image 2 file"]}', - ): - template[ - 'hero_image_2' - ] = f'{config.folder_path}/process/configuration/assets/{phrases["Image 2 file"]}' - template['hero_alt_2'] = '' - template['Image 2 credit'] = phrases['Image 2 credit'] - template.render() - # Set up next page - pdf.add_page() - template = FlexTemplate(pdf, elements=pages['5']) - template[ - 'pct_access_500m_pt.jpg' - ] = f'{figure_path}/pct_access_500m_pt_{language}.jpg' - template[ - 'pct_access_500m_public_open_space_large_score' - ] = f'{figure_path}/pct_access_500m_public_open_space_large_score_{language}.jpg' - template['city_text'] = phrases[f'{city} - Summary'] - ## Checklist ratings for PT and POS - for analysis in ['PT', 'POS']: - for i, policy in enumerate(city_policy[analysis].index): - row = i + 1 - for j, item in enumerate([x for x in city_policy[analysis][i]]): - col = j + 1 - template[f'policy_{analysis}_text{row}_response{col}'] = item - template.render() - # Set up last page - pdf.add_page() - template = FlexTemplate(pdf, elements=pages['6']) - template.render() - return pdf diff --git a/process/subprocesses/_utils.py b/process/subprocesses/_utils.py index e33eb278..e33da1db 100644 --- a/process/subprocesses/_utils.py +++ b/process/subprocesses/_utils.py @@ -1,69 +1,1335 @@ -""" -Utility functions. - -Purpose: These functions may be used in other scripts to undertake specific tasks. -""" -import shutil -import textwrap - - -# 'pretty' text wrapping as per https://stackoverflow.com/questions/37572837/how-can-i-make-python-3s-print-fit-the-size-of-the-command-prompt -def get_terminal_columns(): - return shutil.get_terminal_size().columns - - -def print_autobreak(*args, sep=' '): - width = ( - get_terminal_columns() - ) # Check size once to avoid rechecks per "paragraph" - # Convert all args to strings, join with separator, then split on any newlines, - # preserving line endings, so each "paragraph" wrapped separately - for line in sep.join(map(str, args)).splitlines(True): - # Py3's print function makes it easy to print textwrap.wrap's result as one-liner - print(*textwrap.wrap(line, width), sep='\n') - - -def wrap_autobreak(*args, sep=' '): - width = ( - get_terminal_columns() - ) # Check size once to avoid rechecks per "paragraph" - # Convert all args to strings, join with separator, then split on any newlines, - # preserving line endings, so each "paragraph" wrapped separately - for line in sep.join(map(str, args)).splitlines(True): - # Py3's print function makes it easy to print textwrap.wrap's result as one-liner - return '\n'.join(textwrap.wrap(line, width)) - - -def reproject_raster(inpath, outpath, new_crs): - import rasterio - from rasterio.warp import ( - Resampling, - calculate_default_transform, - reproject, - ) - - dst_crs = new_crs # CRS for web meractor - with rasterio.open(inpath) as src: - transform, width, height = calculate_default_transform( - src.crs, dst_crs, src.width, src.height, *src.bounds, - ) - kwargs = src.meta.copy() - kwargs.update( - { - 'crs': dst_crs, - 'transform': transform, - 'width': width, - 'height': height, - }, - ) - with rasterio.open(outpath, 'w', **kwargs) as dst: - for i in range(1, src.count + 1): - reproject( - source=rasterio.band(src, i), - destination=rasterio.band(dst, i), - src_transform=src.transform, - src_crs=src.crs, - dst_transform=transform, - dst_crs=dst_crs, - resampling=Resampling.nearest, - ) +""" +Report functions. + +Define functions used for formatting and saving indicator reports. +""" +import json +import os +import subprocess as sp +import time +from textwrap import wrap + +import fiona +import geopandas as gpd +import matplotlib as mpl +import matplotlib.font_manager as fm +import matplotlib.pyplot as plt +import matplotlib.ticker as ticker +import numpy as np +import pandas as pd +from babel.numbers import format_decimal as fnum +from babel.units import format_unit +from fpdf import FPDF, FlexTemplate +from matplotlib.cm import ScalarMappable +from matplotlib.lines import Line2D +from mpl_toolkits.axes_grid1 import make_axes_locatable +from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar +from mpl_toolkits.axes_grid1.inset_locator import inset_axes +from subprocesses.batlow import batlow_map as cmap + + +# 'pretty' text wrapping as per https://stackoverflow.com/questions/37572837/how-can-i-make-python-3s-print-fit-the-size-of-the-command-prompt +def get_terminal_columns(): + import shutil + + return shutil.get_terminal_size().columns + + +def print_autobreak(*args, sep=' '): + import textwrap + + width = ( + get_terminal_columns() + ) # Check size once to avoid rechecks per "paragraph" + # Convert all args to strings, join with separator, then split on any newlines, + # preserving line endings, so each "paragraph" wrapped separately + for line in sep.join(map(str, args)).splitlines(True): + # Py3's print function makes it easy to print textwrap.wrap's result as one-liner + print(*textwrap.wrap(line, width), sep='\n') + + +def wrap_autobreak(*args, sep=' '): + width = ( + get_terminal_columns() + ) # Check size once to avoid rechecks per "paragraph" + # Convert all args to strings, join with separator, then split on any newlines, + # preserving line endings, so each "paragraph" wrapped separately + for line in sep.join(map(str, args)).splitlines(True): + # Py3's print function makes it easy to print textwrap.wrap's result as one-liner + return '\n'.join(textwrap.wrap(line, width)) + + +def generate_metadata_yml( + engine, + folder_path, + region_config, + codename, + name, + year, + authors, + url, + individualname, + positionname, + email, +): + """Generate YAML metadata control file.""" + sql = """SELECT ST_Extent(ST_Transform(geom,4326)) FROM urban_study_region;""" + from sqlalchemy import text + + with engine.begin() as connection: + bbox = ( + connection.execute(text(sql)) + .fetchone()[0] + .replace(' ', ',') + .replace('(', '[') + .replace(')', ']')[3:] + ) + + yml = f'{folder_path}/process/configuration/assets/metadata_template.yml' + region_dir = region_config['region_dir'] + datestamp = time.strftime('%Y-%m-%d') + dateyear = time.strftime('%Y') + spatial_bbox = bbox + spatial_crs = 'WGS84' + + with open(yml) as f: + metadata = f.read() + + metadata = metadata.format(**locals()) + metadata = ( + f'# {name} ({codename})\n' + f'# YAML metadata control file (MCF) template for pygeometa\n{metadata}' + ) + metadata_yml = f'{region_dir}/{codename}_metadata.yml' + with open(metadata_yml, 'w') as f: + f.write(metadata) + return os.path.basename(metadata_yml) + + +def generate_metadata_xml(region_dir, codename): + """Generate xml metadata given a yml metadata control file as per the specification required by pygeometa.""" + yml_in = f'{region_dir}/{codename}_metadata.yml' + xml_out = f'{region_dir}/{codename}_metadata.xml' + command = f'pygeometa metadata generate "{yml_in}" --output "{xml_out}" --schema iso19139-2' + sp.call(command, shell=True) + return os.path.basename(xml_out) + + +def postgis_to_csv(file, db_host, db_user, db, db_pwd, table): + """Export table from PostGIS database to CSV.""" + command = ( + f'ogr2ogr -f "CSV" {file} ' + f'PG:"host={db_host} user={db_user} dbname={db} password={db_pwd}" ' + f' {table} ' + ) + sp.call(command, shell=True) + return file + + +def postgis_to_geopackage(gpkg, db_host, db_user, db, db_pwd, tables): + """Export selection of tables from PostGIS database to geopackage.""" + try: + os.remove(gpkg) + except FileNotFoundError: + pass + + for table in tables: + print(f' - {table}') + command = ( + f'ogr2ogr -update -overwrite -lco overwrite=yes -f GPKG {gpkg} ' + f'PG:"host={db_host} user={db_user} dbname={db} password={db_pwd}" ' + f' {table} ' + ) + sp.call(command, shell=True) + + +def get_and_setup_language_cities(config): + """Setup and return languages for given configuration.""" + if config.auto_language: + languages = pd.read_excel(config.configuration, sheet_name='languages') + languages = languages[languages['name'] == config.city].dropna( + axis=1, how='all', + ) + languages = list(languages.columns[2:]) + else: + languages = [config.language] + return languages + + +def generate_report_for_language( + config, language, indicators, policies, +): + """Generate report for a processed city in a given language.""" + city = config.city + font = get_and_setup_font(language, config) + # set up policies + city_policy = policy_data_setup(policies, config.region['policy_review']) + # get city and grid summary data + gpkg = config.region['gpkg'] + if not os.path.isfile(gpkg): + raise Exception( + f'\n\nRequired file {gpkg} could not be located. To proceed with report generation, please ensure that analysis for this region, including export of geopackage, has been completed successfully.\n\n', + ) + layers = fiona.listlayers(gpkg) + if not any([x.startswith(city) for x in layers]): + raise Exception( + f'\n\nResult summary layers for grid and city could not be located in the geopackage {gpkg}. To proceed with report generation, please ensure that analysis for this region has been completed successfully.\n\n', + ) + gdfs = {} + for gdf in ['city', 'grid']: + gdfs[gdf] = gpd.read_file( + gpkg, + layer=[ + layer + for layer in layers + if layer.startswith( + config.region[f'{gdf}_summary'].strip( + time.strftime('%Y-%m-%d'), + ), + ) + ][0], + ) + # The below currently relates walkability to specified reference + # (e.g. the GHSCIC 25 city median, following standardisation using + # 25-city mean and standard deviation for sub-indicators) + gdfs['grid'] = evaluate_comparative_walkability( + gdfs['grid'], indicators['report']['walkability']['ghscic_reference'], + ) + indicators['report']['walkability'][ + 'walkability_above_median_pct' + ] = evaluate_threshold_pct( + gdfs['grid'], + 'all_cities_walkability', + '>', + indicators['report']['walkability']['ghscic_walkability_reference'], + ) + for i in indicators['report']['thresholds']: + indicators['report']['thresholds'][i]['pct'] = evaluate_threshold_pct( + gdfs['grid'], + indicators['report']['thresholds'][i]['field'], + indicators['report']['thresholds'][i]['relationship'], + indicators['report']['thresholds'][i]['criteria'], + ) + # set up phrases + phrases = prepare_phrases(config, city, language) + # Generate resources + if config.generate_resources: + print(f'\nFigures and maps ({language})') + capture_return = generate_resources( + config, + gdfs['city'], + gdfs['grid'], + phrases, + indicators, + city_policy, + language, + cmap, + ) + # instantiate template + for template in config.templates: + print(f'\nReport ({template} PDF template; {language})') + capture_return = generate_scorecard( + config, phrases, indicators, city_policy, language, template, font, + ) + print(capture_return) + + +def get_and_setup_font(language, config): + """Setup and return font for given language configuration.""" + fonts = pd.read_excel(config.configuration, sheet_name='fonts') + if language.replace(' (Auto-translation)', '') in fonts.Language.unique(): + fonts = fonts.loc[ + fonts['Language'] == language.replace(' (Auto-translation)', '') + ].fillna('') + else: + fonts = fonts.loc[fonts['Language'] == 'default'].fillna('') + main_font = fonts.File.values[0].strip() + fm.fontManager.addfont(main_font) + prop = fm.FontProperties(fname=main_font) + fm.findfont(prop=prop, directory=main_font, rebuild_if_missing=True) + plt.rcParams['font.family'] = prop.get_name() + font = fonts.Font.values[0] + return font + + +def policy_data_setup(policies, policy_review): + """Returns a dictionary of policy data.""" + review = pd.read_excel(policy_review, index_col=0) + df_policy = {} + # Presence score + df_policy['Presence_rating'] = review.loc['Score']['Policy identified'] + # Quality score + df_policy['Checklist_rating'] = review.loc['Score']['Quality'] + # Presence + df_policy['Presence'] = review.loc[ + [p['Policy'] for p in policies if p['Display'] == 'Presence'] + ].apply(lambda x: x['Weight'] * x['Policy identified'], axis=1) + # GDP + df_policy['Presence_gdp'] = pd.DataFrame( + [ + { + c: p[c] + for c in p + if c + in ['Label', 'gdp_comparison_middle', 'gdp_comparison_upper'] + } + for p in policies + if p['Display'] == 'Presence' + ], + ) + df_policy['Presence_gdp'].columns = ['Policy', 'middle', 'upper'] + df_policy['Presence_gdp'].set_index('Policy', inplace=True) + # Urban Checklist + df_policy['Checklist'] = review.loc[ + [p['Policy'] for p in policies if p['Display'] == 'Checklist'] + ]['Checklist'] + # Public open space checklist + df_policy['POS'] = review.loc[ + [p['Policy'] for p in policies if p['Display'] == 'POS'] + ]['Checklist'] + # Public transport checklist + df_policy['PT'] = review.loc[ + [p['Policy'] for p in policies if p['Display'] == 'PT'] + ]['Checklist'] + return df_policy + + +def evaluate_comparative_walkability(gdf_grid, reference): + """Evaluate walkability relative to 25-city study reference.""" + for x in reference: + gdf_grid[f'z_{x}'] = (gdf_grid[x] - reference[x]['mean']) / reference[ + x + ]['sd'] + gdf_grid['all_cities_walkability'] = sum( + [gdf_grid[f'z_{x}'] for x in reference], + ) + return gdf_grid + + +def evaluate_threshold_pct( + gdf_grid, indicator, relationship, reference, population='pop_est', +): + """Evaluate whether a pandas series meets a threshold criteria (eg. '<' or '>'.""" + percentage = round( + 100 + * gdf_grid.query(f'{indicator} {relationship} {reference}')[ + population + ].sum() + / gdf_grid[population].sum(), + 1, + ) + return percentage + + +def generate_resources( + config, + gdf_city, + gdf_grid, + phrases, + indicators, + city_policy, + language, + cmap, +): + """ + The function prepares a series of image resources required for the global indicator score cards. + + The city_path string variable is returned, where generated resources will be stored upon successful execution. + """ + figure_path = f'{config.region["region_dir"]}/figures' + locale = phrases['locale'] + city_stats = compile_city_stats(gdf_city, indicators, phrases) + if not os.path.exists(figure_path): + os.mkdir(figure_path) + # Spatial access liveability profile + li_profile( + city_stats=city_stats, + title=phrases['Population % with access within 500m to...'], + cmap=cmap, + phrases=phrases, + path=f'{figure_path}/access_profile_{language}.jpg', + ) + print(f' figures/access_profile_{language}.jpg') + ## constrain extreme outlying walkability for representation + gdf_grid['all_cities_walkability'] = gdf_grid[ + 'all_cities_walkability' + ].apply(lambda x: -6 if x < -6 else (6 if x > 6 else x)) + # Spatial distribution maps + spatial_maps = compile_spatial_map_info( + indicators['report']['spatial_distribution_figures'], + gdf_city, + phrases, + locale, + language=language, + ) + for f in spatial_maps: + spatial_dist_map( + gdf_grid, + column=f, + range=spatial_maps[f]['range'], + label=spatial_maps[f]['label'], + tick_labels=spatial_maps[f]['tick_labels'], + cmap=cmap, + path=f'{figure_path}/{spatial_maps[f]["outfile"]}', + phrases=phrases, + locale=locale, + ) + print(f" figures/{spatial_maps[f]['outfile']}") + # Threshold maps + for scenario in indicators['report']['thresholds']: + threshold_map( + gdf_grid, + column=indicators['report']['thresholds'][scenario]['field'], + scale=indicators['report']['thresholds'][scenario]['scale'], + comparison=indicators['report']['thresholds'][scenario][ + 'criteria' + ], + label=( + f"{phrases[indicators['report']['thresholds'][scenario]['title']]} ({phrases['density_units']})" + ), + cmap=cmap, + path=f"{figure_path}/{indicators['report']['thresholds'][scenario]['field']}_{language}.jpg", + phrases=phrases, + locale=locale, + ) + print( + f" figures/{indicators['report']['thresholds'][scenario]['field']}_{language}.jpg", + ) + # Policy ratings + policy_rating( + range=[0, 24], + score=city_policy['Presence_rating'], + comparison=indicators['report']['policy']['comparisons']['presence'], + label='', + comparison_label=phrases['25 city comparison'], + cmap=cmap, + locale=locale, + path=f'{figure_path}/policy_presence_rating_{language}.jpg', + ) + print(f' figures/policy_presence_rating_{language}.jpg') + policy_rating( + range=[0, 57], + score=city_policy['Checklist_rating'], + comparison=indicators['report']['policy']['comparisons']['quality'], + label='', + comparison_label=phrases['25 city comparison'], + cmap=cmap, + locale=locale, + path=f'{figure_path}/policy_checklist_rating_{language}.jpg', + ) + print(f' figures/policy_checklist_rating_{language}.jpg') + return figure_path + + +def fpdf2_mm_scale(mm): + """Returns a width double that of the conversion of mm to inches. + + This has been found, via trial and error, to be useful when preparing images for display in generated PDFs using fpdf2. + """ + return 2 * mm / 25.4 + + +def _pct(value, locale, length='short'): + """Formats a percentage sign according to a given locale.""" + return format_unit(value, 'percent', locale=locale, length=length) + + +def compile_city_stats(gdf_city, indicators, phrases): + """Compile a set of city statistics with comparisons, given a processed geodataframe of city summary statistics and a dictionary of indicators including reference percentiles.""" + city_stats = {} + city_stats['access'] = gdf_city[ + indicators['report']['accessibility'].keys() + ].transpose()[0] + city_stats['access'].index = [ + indicators['report']['accessibility'][x]['title'] + if city_stats['access'][x] is not None + else f"{indicators['report']['accessibility'][x]['title']} (not evaluated)" + for x in city_stats['access'].index + ] + city_stats['access'] = city_stats['access'].fillna( + 0, + ) # for display purposes + city_stats['comparisons'] = { + indicators['report']['accessibility'][x]['title']: indicators[ + 'report' + ]['accessibility'][x]['ghscic_reference'] + for x in indicators['report']['accessibility'] + } + city_stats['percentiles'] = {} + for percentile in ['p25', 'p50', 'p75']: + city_stats['percentiles'][percentile] = [ + city_stats['comparisons'][x][percentile] + for x in city_stats['comparisons'].keys() + ] + city_stats['access'].index = [ + phrases[x] for x in city_stats['access'].index + ] + return city_stats + + +def compile_spatial_map_info( + spatial_distribution_figures, gdf_city, phrases, locale, language, +): + """ + Compile required information to produce spatial distribution figures. + + This is done using the information recorded in configuration/indicators.yml; specifically, indicators['report']['spatial_distribution_figures'] + """ + # effectively deep copy the supplied dictionary so its not mutable + spatial_maps = json.loads(json.dumps(spatial_distribution_figures)) + for i in spatial_maps: + for text in ['label', 'outfile']: + spatial_maps[i][text] = spatial_maps[i][text].format(**locals()) + if spatial_maps[i]['tick_labels'] is not None: + spatial_maps[i]['tick_labels'] = [ + x.format(**{'phrases': phrases}) + for x in spatial_maps[i]['tick_labels'] + ] + if i.startswith('pct_'): + city_summary_percent = _pct( + fnum(gdf_city[f'pop_{i}'].fillna(0)[0], '0.0', locale), locale, + ) + spatial_maps[i][ + 'label' + ] = f'{spatial_maps[i]["label"]} ({city_summary_percent})' + if gdf_city['pop_pct_access_500m_pt_gtfs_freq_20_score'][ + 0 + ] is None or pd.isna( + gdf_city['pop_pct_access_500m_pt_gtfs_freq_20_score'][0], + ): + spatial_maps['pct_access_500m_pt_any_score'] = spatial_maps.pop( + 'pct_access_500m_pt_gtfs_freq_20_score', + ) + spatial_maps['pct_access_500m_pt_any_score']['label'] = ( + f'{phrases["Percentage of population with access to public transport"]}\n' + f'({_pct(fnum(gdf_city["pop_pct_access_500m_pt_any_score"][0],"0.0",locale),locale)})' + ) + return spatial_maps + + +def add_scalebar( + ax, + length, + multiplier, + units, + fontproperties, + loc='upper left', + pad=0, + color='black', + frameon=False, + size_vertical=2, + locale='en', +): + """ + Adds a scalebar to matplotlib map. + + Requires import of: from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar + As a rule of thumb, a scalebar of 1/3 of feature size seems appropriate. + For example, to achieve this, calculate the variable 'length' as + + gdf_width = gdf.geometry.total_bounds[2] - gdf.geometry.total_bounds[0] + scalebar_length = int(gdf_width / (3000)) + """ + scalebar = AnchoredSizeBar( + ax.transData, + length * multiplier, + format_unit(length, units, locale=locale, length='short'), + loc=loc, + pad=pad, + color=color, + frameon=frameon, + size_vertical=size_vertical, + fontproperties=fontproperties, + ) + ax.add_artist(scalebar) + + +def add_localised_north_arrow( + ax, + text='N', + xy=(1, 0.96), + textsize=14, + arrowprops=dict(facecolor='black', width=4, headwidth=8), +): + """ + Add a minimal north arrow with custom text label above it to a matplotlib map. + + This can be used to add, for example, 'N' or other language equivalent. Default placement is in upper right corner of map. + """ + arrow = ax.annotate( + '', + xy=(1, 0.96), + xycoords=ax.transAxes, + xytext=(0, -0.5), + textcoords='offset pixels', + va='center', + ha='center', + arrowprops=arrowprops, + ) + ax.annotate( + text, + xy=(0.5, 1.5), + xycoords=arrow, + va='center', + ha='center', + fontsize=textsize, + ) + + +## radar chart +def li_profile( + city_stats, + title, + cmap, + path, + phrases, + width=fpdf2_mm_scale(80), + height=fpdf2_mm_scale(80), + dpi=300, +): + """ + Generates a radar chart for city liveability profiles. + + Expands on https://www.python-graph-gallery.com/web-circular-barplot-with-matplotlib + -- A python code blog post by Yan Holtz, in turn expanding on work of Tomás Capretto and Tobias Stadler. + """ + figsize = (width, height) + # Values for the x axis + ANGLES = np.linspace( + 0.15, 2 * np.pi - 0.05, len(city_stats['access']), endpoint=False, + ) + VALUES = city_stats['access'].values + COMPARISON = city_stats['percentiles']['p50'] + INDICATORS = city_stats['access'].index + # Colours + GREY12 = '#1f1f1f' + norm = mpl.colors.Normalize(vmin=0, vmax=100) + COLORS = cmap(list(norm(VALUES))) + # Initialize layout in polar coordinates + textsize = 11 + fig, ax = plt.subplots(figsize=figsize, subplot_kw={'projection': 'polar'}) + # Set background color to white, both axis and figure. + fig.patch.set_facecolor('white') + ax.set_facecolor('white') + ax.set_theta_offset(1.2 * np.pi / 2) + ax.set_ylim(-50, 125) + # Add geometries to the plot ------------------------------------- + # Add bars to represent the cumulative track lengths + ax.bar(ANGLES, VALUES, color=COLORS, alpha=0.9, width=0.52, zorder=10) + # Add interquartile comparison reference lines + ax.vlines( + ANGLES, + city_stats['percentiles']['p25'], + city_stats['percentiles']['p75'], + color=GREY12, + zorder=11, + ) + # Add dots to represent the mean gain + comparison_text = '\n'.join( + wrap(phrases['25 city comparison'], 17, break_long_words=False), + ) + ax.scatter( + ANGLES, + COMPARISON, + s=60, + color=GREY12, + zorder=11, + label=comparison_text, + ) + # Add labels for the indicators + try: + LABELS = [ + '\n'.join(wrap(r, 12, break_long_words=False)) for r in INDICATORS + ] + except Exception: + LABELS = INDICATORS + # Set the labels + ax.set_xticks(ANGLES) + ax.set_xticklabels(LABELS, size=textsize) + # Remove lines for polar axis (x) + ax.xaxis.grid(False) + # Put grid lines for radial axis (y) at 0, 1000, 2000, and 3000 + ax.set_yticklabels([]) + ax.set_yticks([0, 25, 50, 75, 100]) + # Remove spines + ax.spines['start'].set_color('none') + ax.spines['polar'].set_color('none') + # Adjust padding of the x axis labels ---------------------------- + # This is going to add extra space around the labels for the + # ticks of the x axis. + XTICKS = ax.xaxis.get_major_ticks() + for tick in XTICKS: + tick.set_pad(10) + # Add custom annotations ----------------------------------------- + # The following represent the heights in the values of the y axis + PAD = 0 + for num in [0, 50, 100]: + ax.text( + -0.2 * np.pi / 2, + num + PAD, + f'{num}%', + ha='center', + va='center', + backgroundcolor='white', + size=textsize, + ) + # Add text to explain the meaning of the height of the bar and the + # height of the dot + ax.text( + ANGLES[0], + -50, + '\n'.join(wrap(title, 13, break_long_words=False)), + rotation=0, + ha='center', + va='center', + size=textsize, + zorder=12, + ) + angle = np.deg2rad(130) + ax.legend( + loc='lower right', + bbox_to_anchor=(0.58 + np.cos(angle) / 2, 0.46 + np.sin(angle) / 2), + ) + fig.savefig(path, dpi=dpi) + plt.close(fig) + + +## Spatial distribution mapping +def spatial_dist_map( + gdf, + column, + range, + label, + tick_labels, + cmap, + path, + width=fpdf2_mm_scale(88), + height=fpdf2_mm_scale(80), + dpi=300, + phrases={'north arrow': 'N', 'km': 'km'}, + locale='en', +): + """Spatial distribution maps using geopandas geodataframe.""" + figsize = (width, height) + textsize = 14 + fig, ax = plt.subplots(figsize=figsize) + ax.set_axis_off() + divider = make_axes_locatable(ax) # Define 'divider' for the axes + # Legend axes will be located at the 'bottom' of figure, with width '5%' of ax and + # a padding between them equal to '0.1' inches + cax = divider.append_axes('bottom', size='5%', pad=0.1) + gdf.plot( + column=column, + ax=ax, + legend=True, + vmin=range[0], + vmax=range[1], + legend_kwds={ + 'label': '\n'.join(wrap(label, 60, break_long_words=False)) + if label.find('\n') < 0 + else label, + 'orientation': 'horizontal', + }, + cax=cax, + cmap=cmap, + ) + # scalebar + add_scalebar( + ax, + length=int( + (gdf.geometry.total_bounds[2] - gdf.geometry.total_bounds[0]) + / (3000), + ), + multiplier=1000, + units='kilometer', + locale=locale, + fontproperties=fm.FontProperties(size=textsize), + ) + # north arrow + add_localised_north_arrow(ax, text=phrases['north arrow']) + # axis formatting + cax.tick_params(labelsize=textsize) + cax.xaxis.label.set_size(textsize) + if tick_labels is not None: + # cax.set_xticks(cax.get_xticks().tolist()) + # cax.set_xticklabels(tick_labels) + cax.xaxis.set_major_locator(ticker.MaxNLocator(len(tick_labels))) + ticks_loc = cax.get_xticks().tolist() + cax.xaxis.set_major_locator(ticker.FixedLocator(ticks_loc)) + cax.set_xticklabels(tick_labels) + plt.tight_layout() + fig.savefig(path, dpi=dpi) + plt.close(fig) + + +def threshold_map( + gdf, + column, + comparison, + scale, + label, + cmap, + path, + width=fpdf2_mm_scale(88), + height=fpdf2_mm_scale(80), + dpi=300, + phrases={'north arrow': 'N', 'km': 'km'}, + locale='en', +): + """Create threshold indicator map.""" + figsize = (width, height) + textsize = 14 + fig, ax = plt.subplots(figsize=figsize) + ax.set_axis_off() + divider = make_axes_locatable(ax) # Define 'divider' for the axes + # Legend axes will be located at the 'bottom' of figure, with width '5%' of ax and + # a padding between them equal to '0.1' inches + cax = divider.append_axes('bottom', size='5%', pad=0.1) + gdf.plot( + column=column, + ax=ax, + legend=True, + legend_kwds={ + 'label': '\n'.join(wrap(label, 60, break_long_words=False)) + if label.find('\n') < 0 + else label, + 'orientation': 'horizontal', + }, + cax=cax, + cmap=cmap, + ) + # scalebar + add_scalebar( + ax, + length=int( + (gdf.geometry.total_bounds[2] - gdf.geometry.total_bounds[0]) + / (3000), + ), + multiplier=1000, + units='kilometer', + locale=locale, + fontproperties=fm.FontProperties(size=textsize), + ) + # north arrow + add_localised_north_arrow(ax, text=phrases['north arrow']) + # axis formatting + cax.xaxis.set_major_formatter(ticker.EngFormatter()) + cax.tick_params(labelsize=textsize) + cax.xaxis.label.set_size(textsize) + plt.tight_layout() + fig.savefig(path, dpi=dpi) + plt.close(fig) + + +def policy_rating( + range, + score, + cmap, + comparison=None, + width=fpdf2_mm_scale(70), + height=fpdf2_mm_scale(15), + label='Policies identified', + comparison_label='25 city median', + locale='en', + path='policy_rating_test.jpg', + dpi=300, +): + """ + Plot a score (policy rating) and optional comparison (e.g. 25 cities median score) on a colour bar. + + Applied in this context for policy presence and policy quality scores. + """ + textsize = 14 + fig, ax = plt.subplots(figsize=(width, height)) + fig.subplots_adjust(bottom=0) + cmap = cmap + norm = mpl.colors.Normalize(vmin=range[0], vmax=range[1]) + fig.colorbar( + mpl.cm.ScalarMappable(norm=norm, cmap=cmap), + cax=ax, + orientation='horizontal', + # shrink=0.9, pad=0, aspect=90 + ) + # Format Global ticks + if comparison is None: + ax.xaxis.set_ticks([]) + else: + ax.xaxis.set_major_locator(ticker.FixedLocator([comparison])) + # ax.set_xticklabels([comparison_label]) + ax.set_xticklabels(['']) + ax.tick_params(labelsize=textsize) + ax.plot( + comparison, + 0, + marker='v', + color='black', + markersize=9, + zorder=10, + clip_on=False, + ) + if comparison < 7: + for t in ax.get_yticklabels(): + t.set_horizontalalignment('left') + if comparison > 18: + for t in ax.get_yticklabels(): + t.set_horizontalalignment('right') + # Format City ticks + ax_city = ax.twiny() + ax_city.set_xlim(range) + ax_city.xaxis.set_major_locator(ticker.FixedLocator([score])) + ax_city.plot( + score, + 1, + marker='^', + color='black', + markersize=9, + zorder=10, + clip_on=False, + ) + sep = '' + # if comparison is not None and label=='': + ax_city.set_xticklabels( + [f"{sep}{str(score).rstrip('0').rstrip('.')}/{range[1]}{label}"], + ) + ax_city.tick_params(labelsize=textsize) + # return figure with final styling + xlabel = f"{comparison_label} ({fnum(comparison,'0.0',locale)})" + ax.set_xlabel( + xlabel, labelpad=0.5, fontsize=textsize, + ) + plt.tight_layout() + fig.savefig(path, dpi=dpi) + plt.close(fig) + + +def pdf_template_setup( + config, template='template_web', font=None, language='English', +): + """ + Takes a template xlsx sheet defining elements for use in fpdf2's FlexTemplate function. + + This is loosely based on the specification at https://pyfpdf.github.io/fpdf2/Templates.html + However, it has been modified to allow additional definitions which are parsed + by this function + - can define the page for which template elements are to be applied + - colours are specified using standard hexadecimal codes + Any blank cells are set to represent "None". + The function returns a dictionary of elements, indexed by page number strings. + """ + # read in elements + elements = pd.read_excel(config.configuration, sheet_name=template) + document_pages = elements.page.unique() + # Conditional formatting to help avoid inappropriate line breaks and gaps in Tamil and Thai + if language in ['Tamil', 'Thai']: + elements['align'] = elements['align'].replace('J', 'L') + elements.loc[ + (elements['type'] == 'T') & (elements['size'] < 12), 'size', + ] = ( + elements.loc[ + (elements['type'] == 'T') & (elements['size'] < 12), 'size', + ] + - 1 + ) + if font is not None: + elements.loc[elements.font == 'custom', 'font'] = font + elements = elements.to_dict(orient='records') + elements = [ + {k: v if not str(v) == 'nan' else None for k, v in x.items()} + for x in elements + ] + # Need to convert hexadecimal colours (eg FFFFFF is white) to + # decimal colours for the fpdf Template class to work + # We'll establish default hex colours for foreground and background + planes = {'foreground': '000000', 'background': 'FFFFFF'} + for i, element in enumerate(elements): + for plane in planes: + if elements[i][plane] is not None: + # this assumes a hexadecimal string without the 0x prefix + elements[i][plane] = int(elements[i][plane], 16) + else: + elements[i][plane] = int(planes[plane], 16) + pages = {} + for page in document_pages: + pages[f'{page}'] = [x for x in elements if x['page'] == page] + return pages + + +def format_pages(pages, phrases): + """Format pages with phrases.""" + for page in pages: + for i, item in enumerate(pages[page]): + if item['name'] in phrases: + try: + pages[page][i]['text'] = phrases[item['name']].format( + city=phrases['city_name'], + country=phrases['country_name'], + study_doi=phrases['study_doi'], + citation_series=phrases['citation_series'], + citation_doi=phrases['citation_doi'], + citation_population=phrases['citation_population'], + citation_boundaries=phrases['citation_boundaries'], + citation_features=phrases['citation_features'], + citation_colour=phrases['citation_colour'], + ) + except Exception: + pages[f'{page}'][i]['text'] = phrases[item['name']] + return pages + + +def prepare_phrases(config, city, language): + """Prepare dictionary for specific language translation given English phrase.""" + languages = pd.read_excel(config.configuration, sheet_name='languages') + phrases = json.loads(languages.set_index('name').to_json())[language] + city_details = pd.read_excel( + config.configuration, sheet_name='city_details', index_col='City', + ) + country_code = config.region['country_code'] + # set default English country code + if language == 'English' and country_code not in ['AU', 'GB', 'US']: + country_code = 'AU' + phrases['locale'] = f'{phrases["language_code"]}_{country_code}' + # extract English language variables + phrases['metadata_author'] = languages.loc[ + languages['name'] == 'title_author', 'English', + ].values[0] + phrases['metadata_title1'] = languages.loc[ + languages['name'] == 'title_series_line1', 'English', + ].values[0] + phrases['metadata_title2'] = languages.loc[ + languages['name'] == 'title_series_line2', 'English', + ].values[0] + phrases['country'] = languages.loc[ + languages['name'] == f'{city} - Country', 'English', + ].values[0] + # restrict to specific language + languages = languages.loc[ + languages['role'] == 'template', ['name', language], + ] + phrases['vernacular'] = languages.loc[ + languages['name'] == 'language', language, + ].values[0] + phrases['city_name'] = languages.loc[ + languages['name'] == city, language, + ].values[0] + phrases['country_name'] = languages.loc[ + languages['name'] == f'{city} - Country', language, + ].values[0] + phrases['city'] = config.region['name'] + phrases['study_doi'] = f'https://doi.org/{city_details["DOI"]["Study"]}' + phrases['city_doi'] = f'https://doi.org/{city_details["DOI"][city]}' + phrases['study_executive_names'] = city_details['Names']['Study'] + phrases['local_collaborators_names'] = city_details['Names'][city] + phrases['Image 1 file'] = city_details['Image 1 file'][city] + phrases['Image 2 file'] = city_details['Image 2 file'][city] + phrases['Image 1 credit'] = city_details['Image 1 credit'][city] + phrases['Image 2 credit'] = city_details['Image 2 credit'][city] + phrases['region_population_citation'] = config.region['population'][ + 'citation' + ] + phrases['region_urban_region_citation'] = config.region['urban_region'][ + 'citation' + ] + phrases['region_OpenStreetMap_citation'] = config.region['OpenStreetMap'][ + 'citation' + ] + # incoporating study citations + citation_json = json.loads(city_details['exceptions_json']['Study']) + # handle city-specific exceptions + city_exceptions = json.loads(city_details['exceptions_json'][city]) + if language in city_exceptions: + city_exceptions = json.loads( + city_exceptions[language].replace("'", '"'), + ) + for e in city_exceptions: + phrases[e] = city_exceptions[e].replace('|', '\n') + for citation in citation_json: + if citation != 'citation_doi' or 'citation_doi' not in phrases: + phrases[citation] = ( + citation_json[citation].replace('|', '\n').format(**phrases) + ) + phrases['citation_doi'] = phrases['citation_doi'].format(**phrases) + return phrases + + +def wrap_sentences(words, limit=50, delimiter=''): + """Wrap sentences if exceeding limit.""" + sentences = [] + sentence = '' + gap = len(delimiter) + for i, word in enumerate(words): + if i == 0: + sentence = word + continue + # combine word to sentence if under limit + if len(sentence) + gap + len(word) <= limit: + sentence = sentence + delimiter + word + else: + sentences.append(sentence) + sentence = word + # append the final word if not yet appended + if i == len(words) - 1: + sentences.append(sentence) + # finally, append sentence of all words if still below limit + if (i == len(words) - 1) and (sentences == []): + sentences.append(sentence) + return sentences + + +def prepare_pdf_fonts(pdf, config, language): + """Prepare PDF fonts.""" + fonts = pd.read_excel(config.configuration, sheet_name='fonts') + fonts = ( + fonts.loc[ + fonts['Language'].isin( + ['default', language.replace(' (Auto-translation)', '')], + ) + ] + .fillna('') + .drop_duplicates() + ) + for s in ['', 'B', 'I', 'BI']: + for langue in ['default', language]: + if ( + langue.replace(' (Auto-translation)', '') + in fonts.Language.unique() + ): + f = fonts.loc[ + ( + fonts['Language'] + == langue.replace(' (Auto-translation)', '') + ) + & (fonts['Style'] == s) + ] + if f'{f.Font.values[0]}{s}' not in pdf.fonts.keys(): + pdf.add_font( + f.Font.values[0], style=s, fname=f.File.values[0], + ) + + +def save_pdf_layout(pdf, folder, template, filename): + """Save a PDF report in template subfolder in specified location.""" + if not os.path.exists(folder): + os.mkdir(folder) + template_folder = f'{folder}/_{template} reports' + if not os.path.exists(template_folder): + os.mkdir(template_folder) + pdf.output(f'{template_folder}/{filename}') + return f' _{template} reports/{filename}'.replace('/home/ghsci/work/', '') + + +def generate_scorecard( + config, + phrases, + indicators, + city_policy, + language='English', + template='web', + font=None, +): + """ + Format a PDF using the pyfpdf FPDF2 library, and drawing on definitions from a UTF-8 CSV file. + + Included in this function is the marking of a policy 'scorecard', with ticks, crosses, etc. + """ + locale = phrases['locale'] + # Set up PDF document template pages + pages = pdf_template_setup(config, 'template_web', font, language) + pages = format_pages(pages, phrases) + # initialise PDF + pdf = FPDF(orientation='portrait', format='A4', unit='mm') + # set up fonts + prepare_pdf_fonts(pdf, config, language) + pdf.set_author(phrases['metadata_author']) + pdf.set_title(f"{phrases['metadata_title1']} {phrases['metadata_title2']}") + pdf.set_auto_page_break(False) + if template == 'web': + pdf = pdf_for_web( + pdf, + pages, + config, + language, + locale, + phrases, + indicators, + city_policy, + ) + elif template == 'print': + pdf = pdf_for_print( + pdf, + pages, + config, + language, + locale, + phrases, + indicators, + city_policy, + ) + # Output report pdf + filename = f"{phrases['city_name']} - {phrases['title_series_line1'].replace(':','')} - GHSCIC 2022 - {phrases['vernacular']}.pdf" + if phrases['_export'] == 1: + capture_result = save_pdf_layout( + pdf, + folder=config.region['region_dir'], + template=template, + filename=filename, + ) + return capture_result + else: + return 'Skipped.' + + +def pdf_for_web( + pdf, pages, config, language, locale, phrases, indicators, city_policy, +): + """ + Generate a PDF based on a template for web distribution. + + This template includes reporting on both policy and spatial indicators. + """ + city = config.city + city_path = config.region['region_dir'] + figure_path = f'{city_path}/figures' + # Set up Cover page + pdf.add_page() + template = FlexTemplate(pdf, elements=pages['1']) + if os.path.exists( + f'{config.folder_path}/process/configuration/assets/{phrases["Image 1 file"]}', + ): + template[ + 'hero_image' + ] = f'{config.folder_path}/process/configuration/assets/{phrases["Image 1 file"]}' + template['hero_alt'] = '' + template['Image 1 credit'] = phrases['Image 1 credit'] + template.render() + # Set up next page + pdf.add_page() + template = FlexTemplate(pdf, elements=pages['2']) + template['citations'] = phrases['citations'] + template['study_executive_names'] = phrases['study_executive_names'] + template['local_collaborators'] = template['local_collaborators'].format( + title_city=phrases['title_city'], + ) + template['local_collaborators_names'] = phrases[ + 'local_collaborators_names' + ] + if phrases['translation_names'] is None: + template['translation'] = '' + template['translation_names'] = '' + template.render() + # Set up next page + pdf.add_page() + template = FlexTemplate(pdf, elements=pages['3']) + template[ + 'introduction' + ] = f"{phrases['series_intro']}\n\n{phrases['series_interpretation']}".format( + **phrases, + ) + ## Access profile plot + template['access_profile'] = f'{figure_path}/access_profile_{language}.jpg' + ## Walkability plot + template[ + 'all_cities_walkability' + ] = f'{figure_path}/all_cities_walkability_{language}.jpg' + template['walkability_above_median_pct'] = phrases[ + 'walkability_above_median_pct' + ].format( + _pct( + fnum( + indicators['report']['walkability'][ + 'walkability_above_median_pct' + ], + '0.0', + locale, + ), + locale, + ), + ) + ## Policy ratings + template[ + 'presence_rating' + ] = f'{figure_path}/policy_presence_rating_{language}.jpg' + template[ + 'quality_rating' + ] = f'{figure_path}/policy_checklist_rating_{language}.jpg' + template['city_header'] = phrases['city_name'] + ## City planning requirement presence (round 0.5 up to 1) + policy_indicators = {0: '✗', 0.5: '~', 1: '✓'} + for x in range(1, 7): + # check presence + template[f'policy_urban_text{x}_response'] = policy_indicators[ + city_policy['Presence'][x - 1] + ] + # format percentage units according to locale + for gdp in ['middle', 'upper']: + template[f'policy_urban_text{x}_{gdp}'] = _pct( + float(city_policy['Presence_gdp'].iloc[x - 1][gdp]), + locale, + length='short', + ) + ## Walkable neighbourhood policy checklist + for i, policy in enumerate(city_policy['Checklist'].index): + row = i + 1 + for j, item in enumerate([x for x in city_policy['Checklist'][i]]): + col = j + 1 + template[f"policy_{'Checklist'}_text{row}_response{col}"] = item + template.render() + # Set up next page + pdf.add_page() + template = FlexTemplate(pdf, elements=pages['4']) + ## Density plots + template[ + 'local_nh_population_density' + ] = f'{figure_path}/local_nh_population_density_{language}.jpg' + template[ + 'local_nh_intersection_density' + ] = f'{figure_path}/local_nh_intersection_density_{language}.jpg' + ## Density threshold captions + for scenario in indicators['report']['thresholds']: + template[scenario] = phrases[f'optimal_range - {scenario}'].format( + _pct( + fnum( + indicators['report']['thresholds'][scenario]['pct'], + '0.0', + locale, + ), + locale, + ), + fnum( + indicators['report']['thresholds'][scenario]['criteria'], + '#,000', + locale, + ), + phrases['density_units'], + ) + if os.path.exists( + f'{config.folder_path}/process/configuration/assets/{phrases["Image 2 file"]}', + ): + template[ + 'hero_image_2' + ] = f'{config.folder_path}/process/configuration/assets/{phrases["Image 2 file"]}' + template['hero_alt_2'] = '' + template['Image 2 credit'] = phrases['Image 2 credit'] + template.render() + # Set up next page + pdf.add_page() + template = FlexTemplate(pdf, elements=pages['5']) + template[ + 'pct_access_500m_pt.jpg' + ] = f'{figure_path}/pct_access_500m_pt_{language}.jpg' + template[ + 'pct_access_500m_public_open_space_large_score' + ] = f'{figure_path}/pct_access_500m_public_open_space_large_score_{language}.jpg' + template['city_text'] = phrases[f'{city} - Summary'] + ## Checklist ratings for PT and POS + for analysis in ['PT', 'POS']: + for i, policy in enumerate(city_policy[analysis].index): + row = i + 1 + for j, item in enumerate([x for x in city_policy[analysis][i]]): + col = j + 1 + template[f'policy_{analysis}_text{row}_response{col}'] = item + template.render() + # Set up last page + pdf.add_page() + template = FlexTemplate(pdf, elements=pages['6']) + template.render() + return pdf From 041690f9bbb317c8095b11334b6e865c93095f78 Mon Sep 17 00:00:00 2001 From: Carl Higgs Date: Wed, 5 Apr 2023 18:35:33 +1000 Subject: [PATCH 21/26] ensured sample points only represent unique point locations (ie. only one sample point is retained at junction of multiple roads, not multiple) --- process/subprocesses/_07_locate_origins_destinations.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/process/subprocesses/_07_locate_origins_destinations.py b/process/subprocesses/_07_locate_origins_destinations.py index b4134e85..a3d477bf 100644 --- a/process/subprocesses/_07_locate_origins_destinations.py +++ b/process/subprocesses/_07_locate_origins_destinations.py @@ -48,6 +48,13 @@ def main(): CREATE UNIQUE INDEX IF NOT EXISTS {points}_idx ON {points} (point_id); CREATE INDEX IF NOT EXISTS {points}_geom_idx ON {points} USING GIST (geom); """, + 'Only retain point locations with unique geometries (discard duplicates co-located at junction of edges, retaining only single point)': f""" + DELETE FROM {points} a + USING {points} b + WHERE a.point_id > b.point_id + AND st_equals(a.geom, b.geom) + AND a.geom && b.geom; + """, 'Delete any sampling points which were created within the bounds of areas of open space (ie. along paths through parks)...': f""" DELETE FROM {points} p USING open_space_areas o From a3350d7e236735eba24e512fa7b19f1d2f448364 Mon Sep 17 00:00:00 2001 From: Carl Higgs Date: Wed, 5 Apr 2023 22:58:21 +1000 Subject: [PATCH 22/26] slightly simplified the _11_neighbourhood_analysis.py script (there was some duplicated loading of edges, nodes from gdfs -- so this was consolidated, and some of the variable names simplifed, removing 'gdf' prefix where not really needed) --- process/subprocesses/_11_export_gpkg.py | 71 --------------- .../_11_neighbourhood_analysis.py | 88 +++++++++---------- 2 files changed, 42 insertions(+), 117 deletions(-) delete mode 100644 process/subprocesses/_11_export_gpkg.py diff --git a/process/subprocesses/_11_export_gpkg.py b/process/subprocesses/_11_export_gpkg.py deleted file mode 100644 index 16777cd4..00000000 --- a/process/subprocesses/_11_export_gpkg.py +++ /dev/null @@ -1,71 +0,0 @@ -""" -Export geopackage. - -Export a geopackage from postgis database with relevant layers for further analysis and mapping. -""" - -import os -import subprocess as sp - -import geopandas as gpd - -# Set up project and region parameters for GHSCIC analyses -from _project_setup import * -from geoalchemy2 import Geometry, WKTElement -from script_running_log import script_running_log -from sqlalchemy import create_engine, inspect - - -def main(): - # simple timer for log file - start = time.time() - script = os.path.basename(sys.argv[0]) - task = 'Export geopackage' - - engine = create_engine( - 'postgresql://{user}:{pwd}@{host}/{db}'.format( - user=db_user, pwd=db_pwd, host=db_host, db=db, - ), - ) - - tables = [ - 'aos_public_any_nodes_30m_line', - 'aos_public_large_nodes_30m_line', - 'aos_public_osm', - f'{intersections_table}', - 'dest_type', - 'destinations', - 'edges', - 'nodes', - f'{population_grid}', - 'urban_sample_points', - 'urban_study_region', - 'urban_covariates', - ] - if gtfs_feeds is not None: - tables = tables + [gtfs['headway']] - - print('Copying input resource tables to geopackage...'), - - try: - os.remove(gpkg) - except FileNotFoundError: - pass - - for table in tables: - print(f' - {table}') - command = ( - f'ogr2ogr -update -overwrite -lco overwrite=yes -f GPKG {gpkg} ' - f'PG:"host={db_host} user={db_user} dbname={db} password={db_pwd}" ' - f' {table} ' - ) - sp.call(command, shell=True) - print(' Done.') - - # output to completion log - script_running_log(script, task, start, codename) - engine.dispose() - - -if __name__ == '__main__': - main() diff --git a/process/subprocesses/_11_neighbourhood_analysis.py b/process/subprocesses/_11_neighbourhood_analysis.py index 6499b979..af235d0d 100644 --- a/process/subprocesses/_11_neighbourhood_analysis.py +++ b/process/subprocesses/_11_neighbourhood_analysis.py @@ -58,15 +58,24 @@ def main(): db_contents = inspect(engine) with engine.connect() as connection: nodes = gpd.read_postgis('nodes', connection, index_col='osmid') + + nodes.columns = ['geometry' if x == 'geom' else x for x in nodes.columns] + nodes = nodes.set_geometry('geometry') + with engine.connect() as connection: edges = gpd.read_postgis( 'edges_simplified', connection, index_col=['u', 'v', 'key'], ) + + edges.columns = ['geometry' if x == 'geom' else x for x in edges.columns] + edges = edges.set_geometry('geometry') + G_proj = ox.graph_from_gdfs(nodes, edges, graph_attrs=None) with engine.connect() as connection: grid = gpd.read_postgis( population_grid, connection, index_col='grid_id', ) + print( '\nFirst pass node-level neighbourhood analysis (Calculate average population and intersection density ' 'for each intersection node in study regions, taking mean values from distinct grid cells within ' @@ -77,7 +86,7 @@ def main(): if db_contents.has_table('nodes_pop_intersect_density'): print(' - Read population and intersection density from database.') with engine.connect() as connection: - gdf_nodes_simple = gpd.read_postgis( + nodes_simple = gpd.read_postgis( 'nodes_pop_intersect_density', connection, index_col='osmid', @@ -85,25 +94,13 @@ def main(): ) else: print(' - Set up simple nodes') - gdf_nodes = ox.graph_to_gdfs(G_proj, nodes=True, edges=False) - # associate nodes with id - gdf_nodes = spatial_join_index_to_gdf(gdf_nodes, grid, dropna=False) + gdf_nodes = spatial_join_index_to_gdf(nodes, grid, dropna=False) # keep only the unique node id column gdf_nodes = gdf_nodes[['grid_id', 'geometry']] # drop any nodes which are na # (they are outside the buffered study region and not of interest) - gdf_nodes_simple = gdf_nodes[~gdf_nodes.grid_id.isna()].copy() + nodes_simple = gdf_nodes[~gdf_nodes.grid_id.isna()].copy() gdf_nodes = gdf_nodes[['grid_id']] - if ( - len( - [ - x - for x in [population_density, intersection_density] - if x not in gdf_nodes_simple.columns - ], - ) - > 0 - ): # Calculate average population and intersection density for each intersection node in study regions # taking mean values from distinct grid cells within neighbourhood buffer distance nh_grid_fields = ['pop_per_sqkm', 'intersections_per_sqkm'] @@ -119,7 +116,7 @@ def main(): # Add a new edge attribute using the integer weights nx.set_edge_attributes(G_proj, weight, 'weight') # run all pairs analysis - total_nodes = len(gdf_nodes_simple) + total_nodes = len(nodes_simple) print( f' - Generate {neighbourhood_distance}m neighbourhoods ' 'for nodes (All pairs Dijkstra shortest path analysis)', @@ -155,18 +152,18 @@ def main(): .values, ) for index, n in tqdm( - np.ndenumerate(gdf_nodes_simple.index.values), + np.ndenumerate(nodes_simple.index.values), total=total_nodes, desc=' ' * 18, ) ], columns=[population_density, intersection_density], - index=gdf_nodes_simple.index.values, + index=nodes_simple.index.values, ) - gdf_nodes_simple = gdf_nodes_simple.join(result) + nodes_simple = nodes_simple.join(result) # save in geopackage (so output files are all kept together) with engine.connect() as connection: - gdf_nodes_simple.to_postgis( + nodes_simple.to_postgis( 'nodes_pop_intersect_density', connection, index='osmid', ) print( @@ -184,9 +181,8 @@ def main(): # living accessibility, populaiton density and intersections population_density; # sum these three zscores at sample point level print('\nCalculate accessibility to points of interest.') - gdf_nodes, gdf_edges = ox.graph_to_gdfs(G_proj) network = create_pdna_net( - gdf_nodes, gdf_edges, predistance=accessibility_distance, + nodes, edges, predistance=accessibility_distance, ) distance_results = {} print('\nCalculating nearest node analyses ...') @@ -228,36 +224,36 @@ def main(): else: # create null results --- e.g. for GTFS analyses where no layer exists distance_results[f'{analysis_key}_{layer}'] = pd.DataFrame( - index=gdf_nodes.index, + index=nodes.index, columns=[ f'sp_nearest_node_{x}' for x in analysis['output_names'] ], ) # concatenate analysis dataframes into one - gdf_nodes_poi_dist = pd.concat( - [gdf_nodes] + [distance_results[x] for x in distance_results], axis=1, + nodes_poi_dist = pd.concat( + [nodes] + [distance_results[x] for x in distance_results], axis=1, ) - unnecessary_columns = [ - x - for x in [ - 'geometry', - 'grid_id', - 'lat', - 'lon', - 'y', - 'x', - 'highway', - 'ref', + nodes_poi_dist = nodes_poi_dist[ + [ + x + for x in nodes_poi_dist.columns + if x + not in [ + 'y', + 'x', + 'street_count', + 'lon', + 'lat', + 'ref', + 'highway', + 'geometry', + ] ] - if x in gdf_nodes_poi_dist.columns ] - gdf_nodes_poi_dist.drop( - unnecessary_columns, axis=1, inplace=True, errors='ignore', - ) # replace -999 values (meaning no destination reached in less than 500 metres) as nan - gdf_nodes_poi_dist = ( - round(gdf_nodes_poi_dist, 0).replace(-999, np.nan).astype('Int64') + nodes_poi_dist = ( + round(nodes_poi_dist, 0).replace(-999, np.nan).astype('Int64') ) # read sample points from disk (in city-specific geopackage) with engine.connect() as connection: @@ -267,16 +263,16 @@ def main(): ] sample_points = filter_ids( df=sample_points, - query=f"""n1 in {list(gdf_nodes_simple.index.values)} and n2 in {list(gdf_nodes_simple.index.values)}""", + query=f"""n1 in {list(nodes_simple.index.values)} and n2 in {list(nodes_simple.index.values)}""", message='Restrict sample points to those with two associated sample nodes...', ) sample_points.set_index('point_id', inplace=True) - distance_names = list(gdf_nodes_poi_dist.columns) + distance_names = list(nodes_poi_dist.columns) # Estimate full distance to destinations for sample points full_nodes = create_full_nodes( sample_points, - gdf_nodes_simple, - gdf_nodes_poi_dist, + nodes_simple, + nodes_poi_dist, distance_names, population_density, intersection_density, From 926aa2eb56af72ea9a68ff8a97becd158606355a Mon Sep 17 00:00:00 2001 From: Carl Higgs Date: Thu, 6 Apr 2023 00:37:10 +1000 Subject: [PATCH 23/26] re-ensured projected graph was undirected when re-loaded in neighbourhood analyses script --- process/subprocesses/_11_neighbourhood_analysis.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/process/subprocesses/_11_neighbourhood_analysis.py b/process/subprocesses/_11_neighbourhood_analysis.py index af235d0d..cbefbb55 100644 --- a/process/subprocesses/_11_neighbourhood_analysis.py +++ b/process/subprocesses/_11_neighbourhood_analysis.py @@ -70,7 +70,7 @@ def main(): edges.columns = ['geometry' if x == 'geom' else x for x in edges.columns] edges = edges.set_geometry('geometry') - G_proj = ox.graph_from_gdfs(nodes, edges, graph_attrs=None) + G_proj = ox.graph_from_gdfs(nodes, edges, graph_attrs=None).to_undirected() with engine.connect() as connection: grid = gpd.read_postgis( population_grid, connection, index_col='grid_id', @@ -126,7 +126,7 @@ def main(): (k, v.keys()) for k, v in tqdm( nx.all_pairs_dijkstra_path_length( - G_proj, chunk_size, 'weight', + G_proj, neighbourhood_distance, 'weight', ), total=total_nodes, unit='nodes', From 12528a06ec85a204dc0b49c9c9f9f78d5cb3a002 Mon Sep 17 00:00:00 2001 From: Carl Higgs Date: Thu, 6 Apr 2023 13:19:42 +1000 Subject: [PATCH 24/26] re-added in jupyter notebook support (#237) and included directions for tagging with date in the dockerfile (#236) --- docker/Dockerfile | 3 +- docker/environment.yml | 1 + docker/requirements.txt | 92 ++++++++++++++++++++++++++++++++++++----- 3 files changed, 85 insertions(+), 11 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 088d2669..ec4e7379 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -9,7 +9,8 @@ # Push to docker hub # docker login # docker tag globalhealthyliveablecities/global-indicators globalhealthyliveablecities/global-indicators:latest -# docker push globalhealthyliveablecities/global-indicators +# docker tag globalhealthyliveablecities/global-indicators globalhealthyliveablecities/global-indicators:yyyy-mm-dd +# docker push --all-tags globalhealthyliveablecities/global-indicators # # Stop/delete all local docker containers/images: # >>> docker stop $(docker ps -aq) diff --git a/docker/environment.yml b/docker/environment.yml index 8a6ca14f..a262eb7a 100644 --- a/docker/environment.yml +++ b/docker/environment.yml @@ -11,6 +11,7 @@ dependencies: - babel - cartopy - contextily + - jupyter - geoalchemy2 - pandana - psycopg2 diff --git a/docker/requirements.txt b/docker/requirements.txt index b80abaaf..cd9137ec 100644 --- a/docker/requirements.txt +++ b/docker/requirements.txt @@ -1,26 +1,41 @@ affine==2.4.0 +anyio==3.6.2 +argon2-cffi==21.3.0 +argon2-cffi-bindings==21.2.0 +asttokens==2.2.1 attrs==22.2.0 Babel==2.12.1 +backcall==0.2.0 +backports.functools-lru-cache==1.6.4 +beautifulsoup4==4.12.1 +bleach==6.0.0 branca==0.6.0 brotlipy==0.7.0 Cartopy==0.21.1 certifi==2022.12.7 cffi==1.15.1 -charset-normalizer==2.1.1 +charset-normalizer==3.1.0 click==8.1.3 click-plugins==1.1.1 cligj==0.7.2 colorama==0.4.6 +comm==0.1.3 contextily==1.3.0 contourpy==1.0.7 cryptography==39.0.2 cycler==0.11.0 dataclasses==0.8 +debugpy==1.6.6 +decorator==5.1.1 defusedxml==0.7.1 +entrypoints==0.4 et-xmlfile==1.1.0 +executing==1.2.0 +fastjsonschema==2.16.3 Fiona==1.9.2 +flit_core==3.8.0 folium==0.14.0 -fonttools==4.39.2 +fonttools==4.39.3 fpdf2==2.6.1 GDAL==3.6.3 GeoAlchemy2==0.13.1 @@ -31,19 +46,42 @@ greenlet==2.0.2 idna==3.4 importlib-metadata==6.1.0 importlib-resources==5.12.0 +ipykernel==6.22.0 +ipython==8.12.0 +ipython-genutils==0.2.0 +ipywidgets==8.0.6 +jedi==0.18.2 Jinja2==3.1.2 joblib==1.2.0 jsonschema==4.17.3 +jupyter==1.0.0 +jupyter_client==8.1.0 +jupyter-console==6.6.3 +jupyter_core==5.3.0 +jupyter-events==0.6.3 +jupyter_server==2.5.0 +jupyter_server_terminals==0.4.4 +jupyterlab-pygments==0.2.2 +jupyterlab-widgets==3.0.7 kiwisolver==1.4.4 lxml==4.9.2 mapclassify==2.5.0 MarkupSafe==2.1.2 matplotlib==3.7.1 +matplotlib-inline==0.1.6 mercantile==1.2.1 +mistune==2.0.5 munch==2.5.0 munkres==1.1.4 -networkx==3.0 -numexpr==2.8.3 +nbclassic==0.5.5 +nbclient==0.7.3 +nbconvert==7.3.0 +nbformat==5.8.0 +nest-asyncio==1.5.6 +networkx==3.1 +notebook==6.5.3 +notebook_shim==0.2.2 +numexpr==2.8.4 numpy==1.24.2 openpyxl==3.1.1 osmnet==0.1.6 @@ -51,41 +89,75 @@ osmnx==1.3.0 OWSLib==0.28.1 packaging==23.0 pandana==0.6.1 -pandas==1.5.3 -Pillow==9.4.0 +pandas==2.0.0 +pandocfilters==1.5.0 +parso==0.8.3 +pexpect==4.8.0 +pickleshare==0.7.5 +Pillow==9.5.0 pip==23.0.1 pkgutil_resolve_name==1.3.10 platformdirs==3.2.0 +ply==3.11 pooch==1.7.0 +prometheus-client==0.16.0 +prompt-toolkit==3.0.38 +psutil==5.9.4 psycopg2==2.9.3 +ptyprocess==0.7.0 +pure-eval==0.2.2 pycparser==2.21 pygeometa==0.13.1 -pyOpenSSL==23.1.0 +Pygments==2.14.0 +pyOpenSSL==23.1.1 pyparsing==3.0.9 -pyproj==3.4.1 +pyproj==3.5.0 +PyQt5==5.15.7 +PyQt5-sip==12.11.0 pyrsistent==0.19.3 pyshp==2.3.1 PySocks==1.7.1 python-dateutil==2.8.2 -pytz==2023.2 +python-json-logger==2.0.7 +pytz==2023.3 PyYAML==6.0 +pyzmq==25.0.2 +qtconsole==5.4.2 +QtPy==2.3.1 rasterio==1.3.6 requests==2.28.2 +rfc3339-validator==0.1.4 +rfc3986-validator==0.1.1 Rtree==1.0.1 scikit-learn==1.2.2 scipy==1.10.1 -setuptools==67.6.0 +Send2Trash==1.8.0 +setuptools==67.6.1 shapely==2.0.1 +sip==6.7.7 six==1.16.0 +sniffio==1.3.0 snuggs==1.4.7 +soupsieve==2.3.2.post1 SQLAlchemy==1.4.46 +stack-data==0.6.2 tables==3.7.0 +terminado==0.17.1 threadpoolctl==3.1.0 +tinycss2==1.2.1 +toml==0.10.2 +tornado==6.2 tqdm==4.65.0 +traitlets==5.9.0 typing_extensions==4.5.0 +tzdata==2023.3 unicodedata2==15.0.0 urbanaccess==0.2.2 urllib3==1.26.15 +wcwidth==0.2.6 +webencodings==0.5.1 +websocket-client==1.5.1 wheel==0.40.0 +widgetsnbextension==4.0.7 xyzservices==2023.2.0 zipp==3.15.0 From b0c5fd80d0c4200d83925bf6725595ef3eded39e Mon Sep 17 00:00:00 2001 From: Carl Higgs Date: Tue, 11 Apr 2023 17:09:43 +1000 Subject: [PATCH 25/26] fixed typo (#238) and ensured urban query is used when evaluating study region urban intersection using custom boundary when the urban query and urban region have been otherwise defined (#239) --- .../subprocesses/_01_create_study_region.py | 68 ++++++++++++------- 1 file changed, 45 insertions(+), 23 deletions(-) diff --git a/process/subprocesses/_01_create_study_region.py b/process/subprocesses/_01_create_study_region.py index e41b95c2..646151cb 100644 --- a/process/subprocesses/_01_create_study_region.py +++ b/process/subprocesses/_01_create_study_region.py @@ -51,14 +51,14 @@ def main(): boundary_data = urban_region['data_dir'] query = f""" -where "{urban_query.split(':')[1]}" """ if '=' not in query: - raise ( + raise Exception( """ The urban area configured for the study region was indicated, however the query wasn't understood (should be in format "GHS:field=value", e.g. "GHS:UC_NM_MN=Baltimore, or (even better; more specific) "GHS:UC_NM_MN='Baltimore' and CTR_MN_NM=='United States'" - """ + """, ) elif '.gpkg:' in area_data: gpkg = area_data.split(':') @@ -67,7 +67,6 @@ def main(): else: boundary_data = area_data query = '' - command = ( ' ogr2ogr -overwrite -progress -f "PostgreSQL" ' f' PG:"host={db_host} port={db_port} dbname={db}' @@ -105,21 +104,41 @@ def main(): with engine.begin() as connection: connection.execute(text(sql)) else: - # get study region bounding box to be used to retrieve intersecting urban geometries - sql = """ - SELECT - ST_Xmin(geom) xmin, - ST_Ymin(geom) ymin, - ST_Xmax(geom) xmax, - ST_Ymax(geom) ymax - FROM "study_region_boundary"; - """ - with engine.begin() as connection: - result = connection.execute(text(sql)) - bbox = ' '.join( - [str(coord) for coord in [coords for coords in result][0]], - ) - + # Global Human Settlements urban area is used to define this study region + if urban_query is not None: + if '=' not in urban_query: + raise Exception( + """ + The urban area configured for the study region was indicated, + however the query wasn't understood + (should be in format "GHS:field=value", + e.g. "GHS:UC_NM_MN=Baltimore, or (even better; more specific) + "GHS:UC_NM_MN='Baltimore' and CTR_MN_NM=='United States'" + """, + ) + else: + query = f""" -where "{urban_query.split(':')[1]}" """ + additional_sql = '' + else: + # get study region bounding box to be used to retrieve intersecting urban geometries + sql = """ + SELECT + ST_Xmin(geom) xmin, + ST_Ymin(geom) ymin, + ST_Xmax(geom) xmax, + ST_Ymax(geom) ymax + FROM "study_region_boundary"; + """ + with engine.begin() as connection: + result = connection.execute(text(sql)) + bbox = ' '.join( + [str(coord) for coord in [coords for coords in result][0]], + ) + query = f' -spat {bbox} -spat_srs {crs_srid}' + additional_sql = """ + ,"study_region_boundary" b + WHERE ST_Intersects(ST_Union(a.geom),ST_Union(b.geom)) + """ command = ( ' ogr2ogr -overwrite -progress -f "PostgreSQL" ' f' PG:"host={db_host} port={db_port} dbname={db}' @@ -127,7 +146,7 @@ def main(): f' "{urban_region["data_dir"]}" ' f' -lco geometry_name="geom" -lco precision=NO ' f' -t_srs {crs_srid} -nln full_urban_region ' - f' -spat {bbox} -spat_srs {crs_srid} ' + f' {query} ' ) print(command) sp.call(command, shell=True) @@ -137,10 +156,13 @@ def main(): '{db}'::text AS "db", ST_Area(a.geom)/10^6 AS area_sqkm, a.geom - FROM full_urban_region a, - "study_region_boundary" b - WHERE ST_Intersects(a.geom,b.geom); + FROM full_urban_region a + {additional_sql}; CREATE INDEX IF NOT EXISTS urban_region_gix ON urban_region USING GIST (geom); + """ + with engine.begin() as connection: + connection.execute(text(sql)) + sql = """ CREATE TABLE IF NOT EXISTS urban_study_region AS SELECT b."study_region", b."db", @@ -148,7 +170,7 @@ def main(): ST_Union(ST_Intersection(a.geom,b.geom)) geom FROM "study_region_boundary" a, urban_region b - GROUP BY b."study_region_boundary", b."db"; + GROUP BY b."study_region", b."db"; CREATE INDEX IF NOT EXISTS urban_study_region_gix ON urban_study_region USING GIST (geom); """ with engine.begin() as connection: From f15aef759f981c22c9af3062217040912f7c31b1 Mon Sep 17 00:00:00 2001 From: Carl Higgs Date: Wed, 12 Apr 2023 17:52:58 +1000 Subject: [PATCH 26/26] reverted to using 'length' (float) for neighbourhood cost rather than 'weight' (integer; derived from length) as per #243, as the additional lines of code appear redundant (no tangible time benefit at a negligible cost in precision, not justifying the additional complexity) --- process/subprocesses/_11_neighbourhood_analysis.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/process/subprocesses/_11_neighbourhood_analysis.py b/process/subprocesses/_11_neighbourhood_analysis.py index cbefbb55..fbb7580b 100644 --- a/process/subprocesses/_11_neighbourhood_analysis.py +++ b/process/subprocesses/_11_neighbourhood_analysis.py @@ -104,17 +104,6 @@ def main(): # Calculate average population and intersection density for each intersection node in study regions # taking mean values from distinct grid cells within neighbourhood buffer distance nh_grid_fields = ['pop_per_sqkm', 'intersections_per_sqkm'] - # Create a dictionary of edge index and integer values of length - # The length attribute was saved as string, so must be recast to use as weight - # The units are meters, so the decimal precision is unnecessary (error is larger than this; meter is adequate) - weight = dict( - zip( - [k for k in G_proj.edges], - [int(float(G_proj.edges[k]['length'])) for k in G_proj.edges], - ), - ) - # Add a new edge attribute using the integer weights - nx.set_edge_attributes(G_proj, weight, 'weight') # run all pairs analysis total_nodes = len(nodes_simple) print( @@ -126,7 +115,7 @@ def main(): (k, v.keys()) for k, v in tqdm( nx.all_pairs_dijkstra_path_length( - G_proj, neighbourhood_distance, 'weight', + G_proj, neighbourhood_distance, 'length', ), total=total_nodes, unit='nodes',