diff --git a/docker/Dockerfile b/docker/Dockerfile index 713d361a..ec4e7379 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -9,7 +9,8 @@ # Push to docker hub # docker login # docker tag globalhealthyliveablecities/global-indicators globalhealthyliveablecities/global-indicators:latest -# docker push globalhealthyliveablecities/global-indicators +# docker tag globalhealthyliveablecities/global-indicators globalhealthyliveablecities/global-indicators:yyyy-mm-dd +# docker push --all-tags globalhealthyliveablecities/global-indicators # # Stop/delete all local docker containers/images: # >>> docker stop $(docker ps -aq) @@ -17,7 +18,7 @@ # >>> docker rmi $(docker images -q) --force ############################################################################## -FROM --platform=$BUILDPLATFORM continuumio/miniconda3:4.10.3-alpine as build +FROM continuumio/miniconda3:4.10.3-alpine as build LABEL maintainer="Global Healthy Liveable City Indicator Study Collaboration Group" LABEL url="https://github.com/global-healthy-liveable-cities/global-indicators" diff --git a/docker/environment.yml b/docker/environment.yml index 8cecea17..a262eb7a 100644 --- a/docker/environment.yml +++ b/docker/environment.yml @@ -11,6 +11,7 @@ dependencies: - babel - cartopy - contextily + - jupyter - geoalchemy2 - pandana - psycopg2 @@ -18,3 +19,4 @@ dependencies: - rasterio - urbanaccess - tqdm + - pygeometa diff --git a/docker/requirements.txt b/docker/requirements.txt index 3aa09609..cd9137ec 100644 --- a/docker/requirements.txt +++ b/docker/requirements.txt @@ -1,25 +1,41 @@ affine==2.4.0 +anyio==3.6.2 +argon2-cffi==21.3.0 +argon2-cffi-bindings==21.2.0 +asttokens==2.2.1 attrs==22.2.0 Babel==2.12.1 +backcall==0.2.0 +backports.functools-lru-cache==1.6.4 +beautifulsoup4==4.12.1 +bleach==6.0.0 branca==0.6.0 brotlipy==0.7.0 Cartopy==0.21.1 certifi==2022.12.7 cffi==1.15.1 -charset-normalizer==2.1.1 +charset-normalizer==3.1.0 click==8.1.3 click-plugins==1.1.1 cligj==0.7.2 colorama==0.4.6 +comm==0.1.3 contextily==1.3.0 contourpy==1.0.7 cryptography==39.0.2 cycler==0.11.0 +dataclasses==0.8 +debugpy==1.6.6 +decorator==5.1.1 defusedxml==0.7.1 +entrypoints==0.4 et-xmlfile==1.1.0 +executing==1.2.0 +fastjsonschema==2.16.3 Fiona==1.9.2 +flit_core==3.8.0 folium==0.14.0 -fonttools==4.39.2 +fonttools==4.39.3 fpdf2==2.6.1 GDAL==3.6.3 GeoAlchemy2==0.13.1 @@ -29,55 +45,119 @@ geopy==2.3.0 greenlet==2.0.2 idna==3.4 importlib-metadata==6.1.0 +importlib-resources==5.12.0 +ipykernel==6.22.0 +ipython==8.12.0 +ipython-genutils==0.2.0 +ipywidgets==8.0.6 +jedi==0.18.2 Jinja2==3.1.2 joblib==1.2.0 +jsonschema==4.17.3 +jupyter==1.0.0 +jupyter_client==8.1.0 +jupyter-console==6.6.3 +jupyter_core==5.3.0 +jupyter-events==0.6.3 +jupyter_server==2.5.0 +jupyter_server_terminals==0.4.4 +jupyterlab-pygments==0.2.2 +jupyterlab-widgets==3.0.7 kiwisolver==1.4.4 +lxml==4.9.2 mapclassify==2.5.0 MarkupSafe==2.1.2 matplotlib==3.7.1 +matplotlib-inline==0.1.6 mercantile==1.2.1 +mistune==2.0.5 munch==2.5.0 munkres==1.1.4 -networkx==3.0 -numexpr==2.8.3 +nbclassic==0.5.5 +nbclient==0.7.3 +nbconvert==7.3.0 +nbformat==5.8.0 +nest-asyncio==1.5.6 +networkx==3.1 +notebook==6.5.3 +notebook_shim==0.2.2 +numexpr==2.8.4 numpy==1.24.2 openpyxl==3.1.1 osmnet==0.1.6 osmnx==1.3.0 +OWSLib==0.28.1 packaging==23.0 pandana==0.6.1 -pandas==1.5.3 -Pillow==9.4.0 +pandas==2.0.0 +pandocfilters==1.5.0 +parso==0.8.3 +pexpect==4.8.0 +pickleshare==0.7.5 +Pillow==9.5.0 pip==23.0.1 -platformdirs==3.1.1 +pkgutil_resolve_name==1.3.10 +platformdirs==3.2.0 +ply==3.11 pooch==1.7.0 +prometheus-client==0.16.0 +prompt-toolkit==3.0.38 +psutil==5.9.4 psycopg2==2.9.3 +ptyprocess==0.7.0 +pure-eval==0.2.2 pycparser==2.21 -pyOpenSSL==23.0.0 +pygeometa==0.13.1 +Pygments==2.14.0 +pyOpenSSL==23.1.1 pyparsing==3.0.9 -pyproj==3.4.1 +pyproj==3.5.0 +PyQt5==5.15.7 +PyQt5-sip==12.11.0 +pyrsistent==0.19.3 pyshp==2.3.1 PySocks==1.7.1 python-dateutil==2.8.2 -pytz==2022.7.1 +python-json-logger==2.0.7 +pytz==2023.3 PyYAML==6.0 +pyzmq==25.0.2 +qtconsole==5.4.2 +QtPy==2.3.1 rasterio==1.3.6 requests==2.28.2 +rfc3339-validator==0.1.4 +rfc3986-validator==0.1.1 Rtree==1.0.1 scikit-learn==1.2.2 scipy==1.10.1 -setuptools==67.6.0 +Send2Trash==1.8.0 +setuptools==67.6.1 shapely==2.0.1 +sip==6.7.7 six==1.16.0 +sniffio==1.3.0 snuggs==1.4.7 +soupsieve==2.3.2.post1 SQLAlchemy==1.4.46 +stack-data==0.6.2 tables==3.7.0 +terminado==0.17.1 threadpoolctl==3.1.0 +tinycss2==1.2.1 +toml==0.10.2 +tornado==6.2 tqdm==4.65.0 +traitlets==5.9.0 typing_extensions==4.5.0 +tzdata==2023.3 unicodedata2==15.0.0 urbanaccess==0.2.2 urllib3==1.26.15 +wcwidth==0.2.6 +webencodings==0.5.1 +websocket-client==1.5.1 wheel==0.40.0 +widgetsnbextension==4.0.7 xyzservices==2023.2.0 zipp==3.15.0 diff --git a/process/1_create_project_configuration_files.py b/process/1_create_project_configuration_files.py index c305c2c3..3b2fa5ee 100644 --- a/process/1_create_project_configuration_files.py +++ b/process/1_create_project_configuration_files.py @@ -8,6 +8,8 @@ import shutil import sys +from subprocesses._utils import get_terminal_columns, print_autobreak + source_folder = './configuration/templates' dest_folder = './configuration' region_names = [ @@ -55,7 +57,7 @@ notes: Notes for this region """ -print( +print_autobreak( 'Creating project configuration files, if not already existing in the configuration folder...', ) try: @@ -73,17 +75,18 @@ if len(sys.argv) >= 2: codename = sys.argv[1] if os.path.exists(f'./configuration/regions/{codename}.yml'): - print( + print_autobreak( f"\nConfiguration file for the specified study region codename '{codename}' already exists: \nconfiguration/regions/{codename}.yml.\n\nPlease open and edit this file in a text editor following the provided example directions in order to complete configuration for your study region. A completed example study region configuration can be viewed in the file 'configuration/regions/example_ES_Las_Palmas_2023.yml'.\n\nTo view additional guidance on configuration, run this script again without a codename. Once configuration has been completed, to proceed to analysis for this city, enter:\npython 2_analyse_region.py {codename}\n", ) else: with open(f'./configuration/regions/{codename}.yml', 'w') as f: f.write(region_template) - print( + print_autobreak( f"\nNew region configuration file has been initialised using the codename, '{codename}', in the folder:\nconfiguration/regions/{codename}.yml\n\nPlease open and edit this file in a text editor following the provided example directions in order to complete configuration for your study region. A completed example study region configuration can be viewed in the file 'configuration/regions/example_ES_Las_Palmas_2023.yml'.\n\nTo view additional guidance on configuration, run this script again without a codename.\n", ) else: - print( + list_seperation = '\n ' + print_autobreak( f""" Before commencing analysis, your study regions will need to be configured. @@ -102,7 +105,7 @@ Optional configuration of other parameters is also possible. Please visit our tool's website for further guidance on how to use this tool: https://global-healthy-liveable-cities.github.io/ -Currently configured study regions : {', '.join(region_names)} +Currently configured study regions : {list_seperation}{list_seperation.join(region_names)} To initialise a new study region configuration file, you can run this script again providing your choice of codename, for example: diff --git a/process/2_analyse_region.py b/process/2_analyse_region.py index 74f5dda3..9e1736ae 100644 --- a/process/2_analyse_region.py +++ b/process/2_analyse_region.py @@ -25,6 +25,7 @@ time, version, ) +from subprocesses._utils import get_terminal_columns, print_autobreak from tqdm.auto import tqdm # Create study region folder if not exists @@ -55,8 +56,8 @@ current_parameters['project'] == saved_parameters['project'] and current_parameters[codename] == saved_parameters[codename] ): - print( - f"The saved copy of region and project parameters from a previous analysis dated {saved_parameters['date'].replace('_',' at ')} at {region_dir}/_parameters_{saved_parameters['date']}.yml matches the current configuration parameters and will be retained.\n\n", + print_autobreak( + f"The copy of region and project parameters from a previous analysis dated {saved_parameters['date'].replace('_',' at ')} saved in the output directory as _parameters_{saved_parameters['date']}.yml matches the current configuration parameters and will be retained.\n\n", ) else: shutil.copyfile( @@ -72,8 +73,8 @@ sort_keys=False, width=float('inf'), ) - print( - f"Project or region parameters from a previous analysis dated {saved_parameters['date'].replace('_',' at ')} appear to have been modified. The previous parameter record file has been copied to {region_dir}/_parameters_{saved_parameters['date']}.yml, while the current ones have been saved as {region_dir}/_parameters.yml.\n\n", + print_autobreak( + f"Project or region parameters from a previous analysis dated {saved_parameters['date'].replace('_',' at ')} appear to have been modified. The previous parameter record file has been copied to the output directory as _parameters_{saved_parameters['date']}.yml, while the current ones have been saved as _parameters.yml.\n", ) else: with open(f'{region_dir}/_parameters.yml', 'w') as f: @@ -85,15 +86,17 @@ sort_keys=False, width=float('inf'), ) - print( - f'A dated copy of project and region parameters has been saved as {region_dir}/_parameters.yml.\n\n', + print_autobreak( + f'A dated copy of project and region parameters has been saved as {region_dir}/_parameters.yml.'.replace( + '/home/ghsci/work/', '', + ), ) -print( - f'Analysis time zone: {analysis_timezone} (to set time zone for where you are, edit config.yml)', +print_autobreak( + f'\nAnalysis time zone: {analysis_timezone} (to set time zone for where you are, edit config.yml)\n\n', ) start_analysis = time.time() -print(f"Analysis start: {time.strftime('%Y-%m-%d_%H%M')}") +print(f"Analysis start:\t{time.strftime('%Y-%m-%d_%H%M')}") study_region_setup = { '_00_create_database.py': 'Create database', '_01_create_study_region.py': 'Create study region', @@ -106,9 +109,8 @@ '_08_destination_summary.py': 'Summarise spatial distribution', '_09_urban_covariates.py': 'Collate urban covariates', '_10_gtfs_analysis.py': 'Analyse GTFS Feeds', - '_11_export_gpkg.py': 'Export geopackage', - '_12_neighbourhood_analysis.py': 'Analyse neighbourhoods', - '_13_aggregation.py': 'Aggregate region summary analyses', + '_11_neighbourhood_analysis.py': 'Analyse neighbourhoods', + '_12_aggregation.py': 'Aggregate region summary analyses', } pbar = tqdm( study_region_setup, @@ -130,11 +132,11 @@ stdout=append_to_log_file, ) except Exception as e: - print( + print_autobreak( f'\n\nProcessing {step} failed: {e}\n\n Please review the processing log file for this study region for more information on what caused this error and how to resolve it. The file __{name}__{codename}_processing_log.txt is located in the output directory and may be opened for viewing in a text editor.', ) finally: duration = (time.time() - start_analysis) / 60 print( - f'{time.strftime("%Y-%m-%d_%H%M")} (analysis duration of approximately {duration:.1f} minutes)\n\nOnce the setup of study region resources has been successfully completed, we encourage you to inspect the region-specific resources located in the output directory (e.g. text log file, geopackage output, csv files, PDF report and image files).', + f'Analysis end:\t{time.strftime("%Y-%m-%d_%H%M")} (approximately {duration:.1f} minutes)\n', ) diff --git a/process/3_generate_resources.py b/process/3_generate_resources.py index 187c1219..8c0534f6 100644 --- a/process/3_generate_resources.py +++ b/process/3_generate_resources.py @@ -1,21 +1,40 @@ -""" -Global scorecards. - -Format and save indicator reports. -""" +"""Generate resources supporting urban indicator dissemination and usage.""" import os +import shutil import sys -# import and set up functions -import subprocesses._report_functions as _report_functions +from sqlalchemy import create_engine from subprocesses._project_setup import ( + authors, codename, + db, + db_host, + db_pwd, + db_user, + email, folder_path, + gtfs, indicators, + individualname, + name, policies, + positionname, region_config, region_names, + time, + url, + year, +) +from subprocesses._utils import ( + generate_metadata_xml, + generate_metadata_yml, + generate_report_for_language, + get_and_setup_language_cities, + get_terminal_columns, + postgis_to_csv, + postgis_to_geopackage, + print_autobreak, ) @@ -46,15 +65,89 @@ class config: def main(): - languages = _report_functions.get_and_setup_language_cities(config) - if languages == []: - sys.exit( - '\nReport generation failed (no language configured for this city). Please confirm that city and its corresponding codename have been configured in the city details and language worksheets of configuration/_report_configuration.xlsx.\n\n', + engine = create_engine( + f'postgresql://{db_user}:{db_pwd}@{db_host}/{db}', future=True, + ) + # List existing generated resources + print('Analysis parameter summary text file') + print(f' {codename}.yml') + print('\nAnalysis log text file') + print(f" __{region_config['name']}__{codename}_processing_log.txt") + print('\nData files') + print(f" {os.path.basename(region_config['gpkg'])}") + tables = [ + region_config['city_summary'], + region_config['grid_summary'], + region_config['point_summary'], + 'aos_public_osm', + 'dest_type', + 'destinations', + region_config['intersections_table'], + 'edges', + 'nodes', + ] + if region_config['gtfs_feeds'] is not None: + tables = tables + [gtfs['headway']] + postgis_to_geopackage( + region_config['gpkg'], db_host, db_user, db, db_pwd, tables, + ) + for layer in ['city', 'grid']: + print( + postgis_to_csv( + f" {region_config[f'{layer}_summary']}.csv", + db_host, + db_user, + db, + db_pwd, + region_config[f'{layer}_summary'], + ), ) - for language in languages: - _report_functions.generate_report_for_language( - config, language, indicators, policies, + # Generate data dictionary + print('\nData dictionaries') + required_assets = [ + 'output_data_dictionary.csv', + 'output_data_dictionary.xlsx', + ] + for file in required_assets: + shutil.copyfile( + f'./configuration/assets/{file}', + f"{config.region['region_dir']}/{file}", ) + print(f' {file}') + + # Generate metadata + print('\nMetadata') + metadata_yml = generate_metadata_yml( + engine, + folder_path, + region_config, + codename, + name, + year, + authors, + url, + individualname, + positionname, + email, + ) + print(f' {metadata_yml}') + metadata_xml = generate_metadata_xml(config.region['region_dir'], codename) + print(f' {metadata_xml}') + + # Generate reports + languages = get_and_setup_language_cities(config) + if languages == []: + print_autobreak( + ' - Report generation skippped. Please confirm that city and its corresponding codename have been configured in the city details and language worksheets of configuration/_report_configuration.xlsx.', + ) + else: + for language in languages: + generate_report_for_language( + config, language, indicators, policies, + ) + print_autobreak( + '\n\nIt is important to take the time to familiarise yourself with the various outputs generated from the configuration and analysis of your region of interest to ensure they provide a fair and accurate representation given local knowledge. Any issues or limitations identified should be understood and can be iteratively addressed and/or acknowledged in documentation prior to dissemination.\n\n', + ) if __name__ == '__main__': diff --git a/process/configuration/assets/metadata_template.yml b/process/configuration/assets/metadata_template.yml new file mode 100644 index 00000000..d642b12c --- /dev/null +++ b/process/configuration/assets/metadata_template.yml @@ -0,0 +1,233 @@ +mcf: + version: 1.0 + +metadata: + identifier: + language: en + charset: utf8 + hierarchylevel: dataset + datestamp: {datestamp} + dataseturi: + +spatial: + datatype: vector + geomtype: polygon + +identification: + language: eng + charset: utf8 + title: + en: Spatial urban indicator data for {name} + abstract: + en: A set of spatial urban indicators for {name}, derived using the Global Healthy & Sustainable City Indicators tool (https://global-healthy-liveable-cities.github.io/). + dates: + creation: {datestamp} + keywords: + default: + keywords: + en: [urban planning, built environment, health, policy indicators, spatial indicators] + topiccategory: + - Urban analysis and development + extents: + spatial: + - bbox: {spatial_bbox} + crs: {spatial_crs} + temporal: + - begin: {year} + end: {year} + resolution: P1Y + license: + name: ODbL + url: https://opendatacommons.org/licenses/odbl/1-0/ + rights: + en: Copyright (c) {dateyear} + url: https://healthysustainablecities.org + +contact: + pointOfContact: &contact_poc + organization: {authors} + url: {url} + individualname: {individualname} + positionname: {positionname} + email: {email} + contactinstructions: email + + distributor: *contact_poc + +distribution: + GOHSC: + url: https://healthysustainablecities.org + +acquisition: + note: see statement + +dataquality: + scope: + level: dataset + lineage: + statement: | + This dataset was derived using the Global Healthy & Sustainable City Indicators tool (https://global-healthy-liveable-cities.github.io/) developed to support the Global Observatory of Healthy and Sustainable Cities (https://healthysustainablcities.org)/. The analysis was configured as follows:\n\n{region_config} + +attributes: + - Category: Study region information + Description: Continent + Scale: city + Variable: Continent + - Category: Study region information + Description: Country + Scale: city + Variable: Country + - Category: Study region information + Description: 2-letter country code + Scale: city + Variable: ISO 3166-1 alpha-2 + - Category: Study region information + Description: Study region + Scale: 'city grid' + Variable: City + - Category: Derived study region statistics + Description: "Area (km\xB2; accounting for urban restrictions if applied)" + Scale: 'city grid' + Variable: Area (sqkm) + - Category: Derived study region statistics + Description: 'Population estimate + as per configured population data source' + Scale: 'city grid' + Variable: Population estimate + - Category: Derived study region statistics + Description: "Population per km\xB2" + Scale: 'city grid' + Variable: Population per sqkm + - Category: Derived study region statistics + Description: Intersection count (following consolidation based on intersection tolerance parameter in region configuration) + Scale: 'city grid' + Variable: Intersections + - Category: Derived study region statistics + Description: "Intersections per km\xB2" + Scale: 'city grid' + Variable: Intersections per sqkm + - Category: Linked covariates + Description: 'Total emission of CO 2 from the transport sector using non-short-cycle-organic fuels in 2015' + Scale: city + Variable: E_EC2E_T15 + - Category: Linked covariates + Description: 'Total emission of CO 2 from the energy sector + using short-cycle-organic fuels in 2015' + Scale: city + Variable: E_EC2O_T15 + - Category: Linked covariates + Description: Total emission of PM 2.5 from the transport sector in 2015 + Scale: city + Variable: E_EPM2_T15 + - Category: Linked covariates + Description: Total concertation of PM 2.5 for reference epoch 2014 + Scale: city + Variable: E_CPM2_T14 + - Category: Analytical statistic + Description: Sample points used in this analysis (generated along pedestrian network for populated grid areas) + Scale: 'city grid' + Variable: urban_sample_point_count + - Category: Indicator estimates + Description: 'Score (/1) for access within 500 m to a fresh food market / supermarket (source: OpenStreetMap or custom)' + Scale: grid + Variable: access_500m_fresh_food_market_score + - Category: Indicator estimates + Description: 'Score (/1) for access within 500 m to a convenience store (source: OpenStreetMap or custom)' + Scale: grid + Variable: access_500m_convenience_score + - Category: Indicator estimates + Description: 'Score (/1) for access within 500 m to a public transport (source: OpenStreetMap or custom)' + Scale: grid + Variable: access_500m_pt_osm_any_score + - Category: Indicator estimates + Description: 'Score (/1) for access within 500 m to a any public open space (source: OpenStreetMap)' + Scale: grid + Variable: access_500m_public_open_space_any_score + - Category: Indicator estimates + Description: 'Score (/1) for access within 500 m to a public open space larger than 1.5 hectares (source: OpenStreetMap)' + Scale: grid + Variable: access_500m_public_open_space_large_score + - Category: Indicator estimates + Description: 'Score (/1) for access within 500 m to a public transport (source: GTFS)' + Scale: grid + Variable: access_500m_pt_gtfs_any_score + - Category: Indicator estimates + Description: 'Score (/1) for access within 500 m to a public transport with average daytime weekday service frequency of 30 minutes or better (source: GTFS)' + Scale: grid + Variable: access_500m_pt_gtfs_freq_30_score + - Category: Indicator estimates + Description: 'Score (/1) for access within 500 m to a public transport with average daytime weekday service frequency of 20 minutes or better (source: GTFS)' + Scale: grid + Variable: access_500m_pt_gtfs_freq_20_score + - Category: Indicator estimates + Description: 'Score (/1) for access within 500 m to a any public transport stop (source: GTFS or OpenStreetMap/custom)' + Scale: grid + Variable: access_500m_pt_any_score + - Category: Indicator estimates + Description: 'Percentage of population with access within 500 m to a fresh food market / supermarket (source: OpenStreetMap or custom)' + Scale: city + Variable: pop_pct_access_500m_fresh_food_market_score + - Category: Indicator estimates + Description: 'Percentage of population with access within 500 m to a convenience store (source: OpenStreetMap or custom)' + Scale: city + Variable: pop_pct_access_500m_convenience_score + - Category: Indicator estimates + Description: 'Percentage of population with access within 500 m to a public transport (source: OpenStreetMap or custom)' + Scale: city + Variable: pop_pct_access_500m_pt_osm_any_score + - Category: Indicator estimates + Description: 'Percentage of population with access within 500 m to a any public open space (source: OpenStreetMap)' + Scale: city + Variable: pop_pct_access_500m_public_open_space_any_score + - Category: Indicator estimates + Description: 'Percentage of population with access within 500 m to a public open space larger than 1.5 hectares (source: OpenStreetMap)' + Scale: city + Variable: pop_pct_access_500m_public_open_space_large_score + - Category: Indicator estimates + Description: 'Percentage of population with access within 500 m to a public transport (source: GTFS)' + Scale: city + Variable: pop_pct_access_500m_pt_gtfs_any_score + - Category: Indicator estimates + Description: 'Percentage of population with access within 500 m to a public transport with average daytime weekday service frequency of 30 minutes or better (source: GTFS)' + Scale: city + Variable: pop_pct_access_500m_pt_gtfs_freq_30_score + - Category: Indicator estimates + Description: 'Percentage of population with access within 500 m to a public transport with average daytime weekday service frequency of 20 minutes or better (source: GTFS)' + Scale: city + Variable: pop_pct_access_500m_pt_gtfs_freq_20_score + - Category: Indicator estimates + Description: 'Percentage of population with access within 500 m to a any public transport stop (source: GTFS or OpenStreetMap/custom)' + Scale: city + Variable: pop_pct_access_500m_pt_any_score + - Category: Indicator estimates + Description: Average walkable neighbourhood poulation density (population weighted) * + Scale: city + Variable: pop_nh_pop_density + - Category: Indicator estimates + Description: Average walkable neighbourhood intersection density (population weighted) * + Scale: city + Variable: pop_nh_intersection_density + - Category: Indicator estimates + Description: Average daily living score (population weighted) * + Scale: city + Variable: pop_daily_living + - Category: Indicator estimates + Description: Average walkability (population weighted) * + Scale: city + Variable: pop_walkability + - Category: Indicator estimates + Description: 'Average walkable neighbourhood poulation density' + Scale: 'city grid' + Variable: local_nh_population_density + - Category: Indicator estimates + Description: 'Average walkable neighbourhood intersection density' + Scale: 'city grid' + Variable: local_nh_intersection_density + - Category: Indicator estimates + Description: 'Average daily living score ' + Scale: 'city grid' + Variable: local_daily_living + - Category: Indicator estimates + Description: 'Average walkability ' + Scale: 'city grid' + Variable: local_walkability diff --git a/process/configuration/assets/output_data_dictionary.csv b/process/configuration/assets/output_data_dictionary.csv new file mode 100644 index 00000000..5c84bbd3 --- /dev/null +++ b/process/configuration/assets/output_data_dictionary.csv @@ -0,0 +1,41 @@ +Category,Description,Variable,Scale +Study region information,Continent,Continent,city +Study region information,Country,Country,city +Study region information,2-letter country code,ISO 3166-1 alpha-2,city +Study region information,Study region,City,"city, grid" +Derived study region statistics,"Area (km²; accounting for urban restrictions, if applied)",Area (sqkm),"city, grid" +Derived study region statistics,"Population estimate, as per configured population data source",Population estimate,"city, grid" +Derived study region statistics,Population per km²,Population per sqkm,"city, grid" +Derived study region statistics,Intersection count (following consolidation based on intersection tolerance parameter in region configuration),Intersections,"city, grid" +Derived study region statistics,Intersections per km²,Intersections per sqkm,"city, grid" +Linked covariates,"Total emission of CO 2 from the transport sector, using non-short-cycle-organic fuels in 2015",E_EC2E_T15,city +Linked covariates,"Total emission of CO 2 from the energy sector, using short-cycle-organic fuels in 2015",E_EC2O_T15,city +Linked covariates,Total emission of PM 2.5 from the transport sector in 2015,E_EPM2_T15,city +Linked covariates,Total concertation of PM 2.5 for reference epoch 2014,E_CPM2_T14,city +Analytical statistic,Sample points used in this analysis (generated along pedestrian network for populated grid areas),urban_sample_point_count,"city, grid" +Indicator estimates,Score (/1) for access within 500 m to a fresh food market / supermarket (source: OpenStreetMap or custom),access_500m_fresh_food_market_score,grid +Indicator estimates,Score (/1) for access within 500 m to a convenience store (source: OpenStreetMap or custom),access_500m_convenience_score,grid +Indicator estimates,Score (/1) for access within 500 m to a public transport (source: OpenStreetMap or custom),access_500m_pt_osm_any_score,grid +Indicator estimates,Score (/1) for access within 500 m to a any public open space (source: OpenStreetMap),access_500m_public_open_space_any_score,grid +Indicator estimates,Score (/1) for access within 500 m to a public open space larger than 1.5 hectares (source: OpenStreetMap),access_500m_public_open_space_large_score,grid +Indicator estimates,Score (/1) for access within 500 m to a public transport (source: GTFS),access_500m_pt_gtfs_any_score,grid +Indicator estimates,Score (/1) for access within 500 m to a public transport with average daytime weekday service frequency of 30 minutes or better (source: GTFS),access_500m_pt_gtfs_freq_30_score,grid +Indicator estimates,Score (/1) for access within 500 m to a public transport with average daytime weekday service frequency of 20 minutes or better (source: GTFS),access_500m_pt_gtfs_freq_20_score,grid +Indicator estimates,Score (/1) for access within 500 m to a any public transport stop (source: GTFS or OpenStreetMap/custom),access_500m_pt_any_score,grid +Indicator estimates,Percentage of population with access within 500 m to a fresh food market / supermarket (source: OpenStreetMap or custom),pop_pct_access_500m_fresh_food_market_score,city +Indicator estimates,Percentage of population with access within 500 m to a convenience store (source: OpenStreetMap or custom),pop_pct_access_500m_convenience_score,city +Indicator estimates,Percentage of population with access within 500 m to a public transport (source: OpenStreetMap or custom),pop_pct_access_500m_pt_osm_any_score,city +Indicator estimates,Percentage of population with access within 500 m to a any public open space (source: OpenStreetMap),pop_pct_access_500m_public_open_space_any_score,city +Indicator estimates,Percentage of population with access within 500 m to a public open space larger than 1.5 hectares (source: OpenStreetMap),pop_pct_access_500m_public_open_space_large_score,city +Indicator estimates,Percentage of population with access within 500 m to a public transport (source: GTFS),pop_pct_access_500m_pt_gtfs_any_score,city +Indicator estimates,Percentage of population with access within 500 m to a public transport with average daytime weekday service frequency of 30 minutes or better (source: GTFS),pop_pct_access_500m_pt_gtfs_freq_30_score,city +Indicator estimates,Percentage of population with access within 500 m to a public transport with average daytime weekday service frequency of 20 minutes or better (source: GTFS),pop_pct_access_500m_pt_gtfs_freq_20_score,city +Indicator estimates,Percentage of population with access within 500 m to a any public transport stop (source: GTFS or OpenStreetMap/custom),pop_pct_access_500m_pt_any_score,city +Indicator estimates,Average walkable neighbourhood poulation density (population weighted) ,pop_nh_pop_density,city +Indicator estimates,Average walkable neighbourhood intersection density (population weighted) ,pop_nh_intersection_density,city +Indicator estimates,Average daily living score (population weighted),pop_daily_living,city +Indicator estimates,Average walkability (population weighted) ,pop_walkability,city +Indicator estimates,Average walkable neighbourhood poulation density ,local_nh_population_density,"city, grid" +Indicator estimates,Average walkable neighbourhood intersection density ,local_nh_intersection_density,"city, grid" +Indicator estimates,Average daily living score ,local_daily_living,"city, grid" +Indicator estimates,Average walkability ,local_walkability,"city, grid" diff --git a/process/configuration/assets/output_data_dictionary.xlsx b/process/configuration/assets/output_data_dictionary.xlsx new file mode 100644 index 00000000..bafe6f58 Binary files /dev/null and b/process/configuration/assets/output_data_dictionary.xlsx differ diff --git a/process/configuration/templates/config.yml b/process/configuration/templates/config.yml index ec1ac4e3..12fa9bb8 100644 --- a/process/configuration/templates/config.yml +++ b/process/configuration/templates/config.yml @@ -12,7 +12,7 @@ project: # Specifically, this can be useful if you notice that the Docker process has been 'Killed' when running the neighbourhood analysis script. multiprocessing: 6 # Number of processors to use in multiprocessing scripts, if implemented - default_codename: example_las_palmas_2023 + default_codename: example_ES_Las_Palmas_2023 # an optional default study region as defined in regions.yml, useful for debugging analysis_timezone: Australia/Melbourne # to have localised time estimates for when processes have commenced and finished, modify this (see https://en.wikipedia.org/wiki/List_of_tz_database_time_zones). @@ -41,9 +41,17 @@ network_analysis: # For scaling binary cutoffs using a smooth transition; this parameter adjusts slope k of the transition documentation: authors: Global Healthy Liveable Cities Indicator Study Collaboration - # Authors of project - version: 2.0 - # Version of documentation + # Authors of project (for metadata) + url: + # Your URL (optional; for metadata) + individualname: + # Name of contact person for analysis (for metadata) + positionname: + # Position of contact person conducting analysis (for metadata) + email: + # Contact e-mail (for metadata) + version: 1.0 + # Edition of project (for metadata) points_of_interest: destination_list: [supermarket_osm, bakery_osm, meat_seafood_osm, fruit_veg_osm, deli_osm, convenience_osm, petrolstation_osm, newsagent_osm, market_osm, pt_any] osm_destination_definitions: process/configuration/osm_destination_definitions.csv diff --git a/process/subprocesses/_01_create_study_region.py b/process/subprocesses/_01_create_study_region.py index 62bbbf39..646151cb 100644 --- a/process/subprocesses/_01_create_study_region.py +++ b/process/subprocesses/_01_create_study_region.py @@ -51,14 +51,14 @@ def main(): boundary_data = urban_region['data_dir'] query = f""" -where "{urban_query.split(':')[1]}" """ if '=' not in query: - raise ( + raise Exception( """ The urban area configured for the study region was indicated, however the query wasn't understood (should be in format "GHS:field=value", e.g. "GHS:UC_NM_MN=Baltimore, or (even better; more specific) "GHS:UC_NM_MN='Baltimore' and CTR_MN_NM=='United States'" - """ + """, ) elif '.gpkg:' in area_data: gpkg = area_data.split(':') @@ -67,7 +67,6 @@ def main(): else: boundary_data = area_data query = '' - command = ( ' ogr2ogr -overwrite -progress -f "PostgreSQL" ' f' PG:"host={db_host} port={db_port} dbname={db}' @@ -105,21 +104,41 @@ def main(): with engine.begin() as connection: connection.execute(text(sql)) else: - # get study region bounding box to be used to retrieve intersecting urban geometries - sql = """ - SELECT - ST_Xmin(geom) xmin, - ST_Ymin(geom) ymin, - ST_Xmax(geom) xmax, - ST_Ymax(geom) ymax - FROM "study_region_boundary"; - """ - with engine.begin() as connection: - result = connection.execute(text(sql)) - bbox = ' '.join( - [str(coord) for coord in [coords for coords in result][0]], - ) - + # Global Human Settlements urban area is used to define this study region + if urban_query is not None: + if '=' not in urban_query: + raise Exception( + """ + The urban area configured for the study region was indicated, + however the query wasn't understood + (should be in format "GHS:field=value", + e.g. "GHS:UC_NM_MN=Baltimore, or (even better; more specific) + "GHS:UC_NM_MN='Baltimore' and CTR_MN_NM=='United States'" + """, + ) + else: + query = f""" -where "{urban_query.split(':')[1]}" """ + additional_sql = '' + else: + # get study region bounding box to be used to retrieve intersecting urban geometries + sql = """ + SELECT + ST_Xmin(geom) xmin, + ST_Ymin(geom) ymin, + ST_Xmax(geom) xmax, + ST_Ymax(geom) ymax + FROM "study_region_boundary"; + """ + with engine.begin() as connection: + result = connection.execute(text(sql)) + bbox = ' '.join( + [str(coord) for coord in [coords for coords in result][0]], + ) + query = f' -spat {bbox} -spat_srs {crs_srid}' + additional_sql = """ + ,"study_region_boundary" b + WHERE ST_Intersects(ST_Union(a.geom),ST_Union(b.geom)) + """ command = ( ' ogr2ogr -overwrite -progress -f "PostgreSQL" ' f' PG:"host={db_host} port={db_port} dbname={db}' @@ -127,7 +146,7 @@ def main(): f' "{urban_region["data_dir"]}" ' f' -lco geometry_name="geom" -lco precision=NO ' f' -t_srs {crs_srid} -nln full_urban_region ' - f' -spat {bbox} -spat_srs {crs_srid} ' + f' {query} ' ) print(command) sp.call(command, shell=True) @@ -137,10 +156,13 @@ def main(): '{db}'::text AS "db", ST_Area(a.geom)/10^6 AS area_sqkm, a.geom - FROM full_urban_region a, - "study_region_boundary" b - WHERE ST_Intersects(a.geom,b.geom); + FROM full_urban_region a + {additional_sql}; CREATE INDEX IF NOT EXISTS urban_region_gix ON urban_region USING GIST (geom); + """ + with engine.begin() as connection: + connection.execute(text(sql)) + sql = """ CREATE TABLE IF NOT EXISTS urban_study_region AS SELECT b."study_region", b."db", @@ -148,7 +170,7 @@ def main(): ST_Union(ST_Intersection(a.geom,b.geom)) geom FROM "study_region_boundary" a, urban_region b - GROUP BY b."study_region_boundary", b."db"; + GROUP BY b."study_region", b."db"; CREATE INDEX IF NOT EXISTS urban_study_region_gix ON urban_study_region USING GIST (geom); """ with engine.begin() as connection: @@ -190,12 +212,11 @@ def main(): and db_contents.has_table('urban_study_region') and db_contents.has_table(buffered_urban_study_region) ): - return f"""Study region boundaries have previously been created (study_region_boundary, urban_region, urban_study_region and {buffered_urban_study_region}). If you wish to recreate these, please manually drop them (e.g. using psql) or optionally drop the {db} database and start again (e.g. using the subprocesses/_drop_study_region_database.py utility script.\n""" + return """Study region boundaries have been created (study_region_boundary, urban_region, urban_study_region and {buffered_urban_study_region}). If you wish to recreate these, please manually drop them (e.g. using psql) or optionally drop the {db} database and start again (e.g. using the subprocesses/_drop_study_region_database.py utility script.\n""" else: raise Exception( """Study region boundary creation failed; check configuration and log files to identify specific issues.""", ) - # output to completion log script_running_log(script, task, start, codename) engine.dispose() diff --git a/process/subprocesses/_03_create_network_resources.py b/process/subprocesses/_03_create_network_resources.py index 5e8e839a..fa2d0601 100644 --- a/process/subprocesses/_03_create_network_resources.py +++ b/process/subprocesses/_03_create_network_resources.py @@ -23,13 +23,92 @@ from tqdm import tqdm -def derive_routable_network( - engine, - network_study_region, - graphml, - osmnx_retain_all, - network_polygon_iteration, +def osmnx_configuration(region_config, network): + """Set up OSMnx for network retrieval and analysis, including check of configuration.""" + ox.settings.use_cache = True + ox.settings.log_console = True + # set OSMnx to retrieve filtered network to match OpenStreetMap publication date + osm_publication_date = f"""[date:"{datetime.strptime(str(region_config['OpenStreetMap']['publication_date']), '%Y%m%d').strftime('%Y-%m-%d')}T00:00:00Z"]""" + ox.settings.overpass_settings = ( + '[out:json][timeout:{timeout}]' + osm_publication_date + '{maxsize}' + ) + if not network['osmnx_retain_all']: + print( + """Note: "osmnx_retain_all = False" ie. only main network segment is retained. Please ensure this is appropriate for your study region (ie. networks on real islands may be excluded).""", + ) + elif network['osmnx_retain_all']: + print( + """Note: "osmnx_retain_all = True" ie. all network segments will be retained. Please ensure this is appropriate for your study region (ie. networks on real islands will be included, however network artifacts resulting in isolated network segments, or network islands, may also exist. These could be problematic if sample points are snapped to erroneous, mal-connected segments. Check results.).""", + ) + else: + sys.exit( + "Please ensure the osmnx_retain_all has been defined for this region with values of either 'False' or 'True'", + ) + + +def generate_pedestrian_network_nodes_edges( + engine, network, network_study_region, crs, ): + """Generate pedestrian network using OSMnx and store in a PostGIS database, or otherwise retrieve it.""" + db_contents = inspect(engine) + if db_contents.has_table('nodes') and db_contents.has_table('edges'): + print( + f'Network "pedestrian" for {network_study_region} has already been processed.', + ) + print('\nLoading the pedestrian network.') + with engine.connect() as connection: + nodes = gpd.read_postgis('nodes', connection, index_col='osmid') + with engine.connect() as connection: + edges = gpd.read_postgis( + 'edges', connection, index_col=['u', 'v', 'key'], + ) + G_proj = ox.graph_from_gdfs(nodes, edges, graph_attrs=None) + return G_proj + else: + G = derive_pedestrian_network( + engine, + network_study_region, + network['osmnx_retain_all'], + network['polygon_iteration'], + ) + print( + ' - Save edges with geometry to postgis prior to simplification', + ) + graph_to_postgis( + G, engine, 'edges', nodes=False, geometry_name='geom_4326', + ) + print(' - Remove unnecessary key data from edges') + att_list = { + k + for n in G.edges + for k in G.edges[n].keys() + if k not in ['osmid', 'length'] + } + capture_output = [ + [d.pop(att, None) for att in att_list] + for n1, n2, d in tqdm(G.edges(data=True), desc=' ' * 18) + ] + del capture_output + print(' - Project graph') + G_proj = ox.project_graph(G, to_crs=crs['srid']) + if G_proj.is_directed(): + G_proj = G_proj.to_undirected() + print( + ' - Save simplified, projected, undirected graph edges and node GeoDataFrames to PostGIS', + ) + graph_to_postgis( + G_proj, + engine, + nodes_table='nodes', + edges_table='edges_simplified', + ) + return G_proj + + +def derive_pedestrian_network( + engine, network_study_region, osmnx_retain_all, network_polygon_iteration, +): + """Derive routable pedestrian network using OSMnx.""" print( 'Creating and saving pedestrian roads network... ', end='', flush=True, ) @@ -86,145 +165,83 @@ def derive_routable_network( # induce a subgraph on those nodes G = nx.MultiDiGraph(G.subgraph(nodes)) - ox.save_graphml(G, filepath=graphml, gephi=False) + G = G.to_undirected() print('Done.') return G -def main(): - # simple timer for log file - start = time.time() - script = os.path.basename(sys.argv[0]) - task = 'Create network resources' - - engine = create_engine( - f'postgresql://{db_user}:{db_pwd}@{db_host}/{db}', - future=True, - pool_pre_ping=True, - connect_args={ - 'keepalives': 1, - 'keepalives_idle': 30, - 'keepalives_interval': 10, - 'keepalives_count': 5, - }, - ) - db_contents = inspect(engine) - if network['buffered_region']: - network_study_region = buffered_urban_study_region +def graph_to_postgis( + G, + engine, + nodes_table='nodes', + edges_table='edges', + nodes=True, + edges=True, + geometry_name='geom', +): + """Save graph nodes and/or edges to postgis database.""" + if nodes is True and edges is False: + nodes = ox.graph_to_gdfs(G, edges=False) + gdf_to_postgis_format(nodes, engine, nodes_table, geometry_name) + if edges is True and nodes is False: + edges = ox.graph_to_gdfs(G, nodes=False) + gdf_to_postgis_format(edges, engine, edges_table, geometry_name) else: - network_study_region = study_region + nodes, edges = ox.graph_to_gdfs(G) + gdf_to_postgis_format(nodes, engine, nodes_table, geometry_name) + gdf_to_postgis_format(edges, engine, edges_table, geometry_name) - if not ( - db_contents.has_table('edges') - and db_contents.has_table('nodes') - and db_contents.has_table(intersections_table) - ): - print('\nGet networks and save as graphs.') - ox.settings.use_cache = True - ox.settings.log_console = True - # set OSMnx to retrieve filtered network to match OpenStreetMap publication date - osm_publication_date = f"""[date:"{datetime.strptime(str(region_config['OpenStreetMap']['publication_date']), '%Y%m%d').strftime('%Y-%m-%d')}T00:00:00Z"]""" - ox.settings.overpass_settings = ( - '[out:json][timeout:{timeout}]' - + osm_publication_date - + '{maxsize}' + +def gdf_to_postgis_format(gdf, engine, table, geometry_name='geom'): + """Sets geometry with optional new name (e.g. 'geom') and writes to PostGIS, returning the reformatted GeoDataFrame.""" + gdf.columns = [ + geometry_name if x == 'geometry' else x for x in gdf.columns + ] + gdf = gdf.set_geometry(geometry_name) + with engine.connect() as connection: + gdf.to_postgis( + table, connection, index=True, if_exists='replace', ) - if not network['osmnx_retain_all']: - print( - """Note: "osmnx_retain_all = False" ie. only main network segment is retained. Please ensure this is appropriate for your study region (ie. networks on real islands may be excluded).""", - ) - elif network['osmnx_retain_all']: - print( - """Note: "osmnx_retain_all = True" ie. all network segments will be retained. Please ensure this is appropriate for your study region (ie. networks on real islands will be included, however network artifacts resulting in isolated network segments, or network islands, may also exist. These could be problematic if sample points are snapped to erroneous, mal-connected segments. Check results.).""", - ) - else: - sys.exit( - "Please ensure the osmnx_retain_all has been defined for this region with values of either 'False' or 'True'", - ) - if os.path.isfile(graphml): - print( - f'Network "pedestrian" for {network_study_region} has already been processed.', - ) - print('\nLoading the pedestrian network.') - G = ox.load_graphml(graphml) - else: - G = derive_routable_network( - engine, - network_study_region, - graphml, - network['osmnx_retain_all'], - network['polygon_iteration'], - ) - if not ( - db_contents.has_table('nodes') and db_contents.has_table('edges') - ): - print( - f'\nPrepare and copy nodes and edges to postgis in project CRS {crs["srid"]}... ', - ) - nodes, edges = ox.graph_to_gdfs(G) - with engine.connect() as connection: - nodes.rename_geometry('geom').to_crs(crs['srid']).to_postgis( - 'nodes', connection, index=True, - ) - edges.rename_geometry('geom').to_crs(crs['srid']).to_postgis( - 'edges', connection, index=True, - ) - if not db_contents.has_table(intersections_table): - ## Copy clean intersections to postgis - print('\nPrepare and copy clean intersections to postgis... ') - # Clean intersections - if not os.path.exists(graphml_proj): - print(' - Remove unnecessary key data from edges') - att_list = { - k - for n in G.edges - for k in G.edges[n].keys() - if k not in ['osmid', 'length'] - } - capture_output = [ - [d.pop(att, None) for att in att_list] - for n1, n2, d in tqdm(G.edges(data=True), desc=' ' * 18) - ] - del capture_output - G_proj = ox.project_graph(G, to_crs=crs['srid']) - if G_proj.is_directed(): - G_proj = G_proj.to_undirected() - ox.save_graphml(G_proj, filepath=graphml_proj, gephi=False) - else: - G_proj = ox.load_graphml(graphml_proj) - intersections = ox.consolidate_intersections( - G_proj, - tolerance=network['intersection_tolerance'], - rebuild_graph=False, - dead_ends=False, +def clean_intersections(engine, G_proj, network, intersections_table): + """Generate cleaned intersections using OSMnx and store in postgis database, or otherwise retrieve them.""" + db_contents = inspect(engine) + if not db_contents.has_table(intersections_table): + ## Copy clean intersections to postgis + print('\nPrepare and copy clean intersections to postgis... ') + # Clean intersections + intersections = ox.consolidate_intersections( + G_proj, + tolerance=network['intersection_tolerance'], + rebuild_graph=False, + dead_ends=False, + ) + intersections = gpd.GeoDataFrame( + intersections, columns=['geom'], + ).set_geometry('geom') + with engine.connect() as connection: + intersections.to_postgis( + intersections_table, connection, index=True, ) - intersections = gpd.GeoDataFrame( - intersections, columns=['geom'], - ).set_geometry('geom') - with engine.connect() as connection: - intersections.to_postgis( - intersections_table, connection, index=True, - ) - print(' - Done.') + print(' - Done.') - else: - print( - ' - It appears that clean intersection data has already been prepared and imported for this region.', - ) else: print( - '\nIt appears that edges and nodes have already been prepared and imported for this region.', + ' - It appears that clean intersection data has already been prepared and imported for this region.', ) + +def create_pgrouting_network_topology(engine): + """Create network topology for pgrouting for later analysis of node relations for sample points.""" sql = """SELECT 1 WHERE to_regclass('public.edges_target_idx') IS NOT NULL;""" with engine.begin() as connection: res = connection.execute(text(sql)).first() - if res is None: print('\nCreate network topology...') - sql = """ + sql = f""" + ALTER TABLE edges ADD COLUMN IF NOT EXISTS "geom" geometry; + UPDATE edges SET geom = ST_Transform(geom_4326, {crs['srid']}); ALTER TABLE edges ADD COLUMN IF NOT EXISTS "from" bigint; ALTER TABLE edges ADD COLUMN IF NOT EXISTS "to" bigint; UPDATE edges SET "from" = v, "to" = u WHERE key != 2; @@ -236,9 +253,7 @@ def main(): """ with engine.begin() as connection: min_id, max_id = connection.execute(text(sql)).first() - print(f'there are {max_id - min_id + 1} edges to be processed') - interval = 10000 for x in range(min_id, max_id + 1, interval): sql = f"select pgr_createTopology('edges', 1, 'geom', 'ogc_fid', rows_where:='ogc_fid>={x} and ogc_fid<{x+interval}');" @@ -260,6 +275,46 @@ def main(): ' - It appears that the routable pedestrian network has already been set up for use by pgRouting.', ) + +def main(): + # simple timer for log file + start = time.time() + script = os.path.basename(sys.argv[0]) + task = 'Create network resources' + engine = create_engine( + f'postgresql://{db_user}:{db_pwd}@{db_host}/{db}', + future=True, + pool_pre_ping=True, + connect_args={ + 'keepalives': 1, + 'keepalives_idle': 30, + 'keepalives_interval': 10, + 'keepalives_count': 5, + }, + ) + db_contents = inspect(engine) + if network['buffered_region']: + network_study_region = buffered_urban_study_region + else: + network_study_region = study_region + + if not ( + db_contents.has_table('edges') + and db_contents.has_table('nodes') + and db_contents.has_table(intersections_table) + ): + osmnx_configuration(region_config, network) + G_proj = generate_pedestrian_network_nodes_edges( + engine, network, network_study_region, crs, + ) + clean_intersections(engine, G_proj, network, intersections_table) + else: + print( + '\nIt appears that edges and nodes have already been prepared and imported for this region.', + ) + + create_pgrouting_network_topology(engine) + # ensure user is granted access to the newly created tables with engine.begin() as connection: connection.execute(text(grant_query)) diff --git a/process/subprocesses/_04_create_population_grid.py b/process/subprocesses/_04_create_population_grid.py index 498e391e..8e7ca5dc 100644 --- a/process/subprocesses/_04_create_population_grid.py +++ b/process/subprocesses/_04_create_population_grid.py @@ -12,7 +12,7 @@ # Set up project and region parameters for GHSCIC analyses from _project_setup import * -from _utils import reproject_raster +from geoalchemy2 import Geometry from osgeo import gdal from script_running_log import script_running_log from sqlalchemy import create_engine, inspect, text @@ -23,6 +23,41 @@ gdal.SetConfigOption('CPL_LOG', '/dev/null') # Linux/MacOS +def reproject_raster(inpath, outpath, new_crs): + import rasterio + from rasterio.warp import ( + Resampling, + calculate_default_transform, + reproject, + ) + + dst_crs = new_crs # CRS for web meractor + with rasterio.open(inpath) as src: + transform, width, height = calculate_default_transform( + src.crs, dst_crs, src.width, src.height, *src.bounds, + ) + kwargs = src.meta.copy() + kwargs.update( + { + 'crs': dst_crs, + 'transform': transform, + 'width': width, + 'height': height, + }, + ) + with rasterio.open(outpath, 'w', **kwargs) as dst: + for i in range(1, src.count + 1): + reproject( + source=rasterio.band(src, i), + destination=rasterio.band(dst, i), + src_transform=src.transform, + src_crs=src.crs, + dst_transform=transform, + dst_crs=dst_crs, + resampling=Resampling.nearest, + ) + + def main(): # simple timer for log file start = time.time() diff --git a/process/subprocesses/_07_locate_origins_destinations.py b/process/subprocesses/_07_locate_origins_destinations.py index f2ebc50c..a3d477bf 100644 --- a/process/subprocesses/_07_locate_origins_destinations.py +++ b/process/subprocesses/_07_locate_origins_destinations.py @@ -48,6 +48,13 @@ def main(): CREATE UNIQUE INDEX IF NOT EXISTS {points}_idx ON {points} (point_id); CREATE INDEX IF NOT EXISTS {points}_geom_idx ON {points} USING GIST (geom); """, + 'Only retain point locations with unique geometries (discard duplicates co-located at junction of edges, retaining only single point)': f""" + DELETE FROM {points} a + USING {points} b + WHERE a.point_id > b.point_id + AND st_equals(a.geom, b.geom) + AND a.geom && b.geom; + """, 'Delete any sampling points which were created within the bounds of areas of open space (ie. along paths through parks)...': f""" DELETE FROM {points} p USING open_space_areas o @@ -66,25 +73,29 @@ def main(): DROP TABLE IF EXISTS sampling_locate_line; CREATE TABLE sampling_locate_line AS SELECT s.point_id, - s.ogc_fid edge_ogc_fid, - s.metres, - "from" n1, - "to" n2, - e.geom AS edge_geom, - ST_LineLocatePoint(e.geom, n1.geom) llp1, - ST_LineLocatePoint(e.geom, s.geom) llpm, - ST_LineLocatePoint(e.geom, n2.geom) llp2, - s.geom + s.ogc_fid edge_ogc_fid, + o.grid_id, + s.metres, + "from" n1, + "to" n2, + e.geom AS edge_geom, + ST_LineLocatePoint(e.geom, n1.geom) llp1, + ST_LineLocatePoint(e.geom, s.geom) llpm, + ST_LineLocatePoint(e.geom, n2.geom) llp2, + s.geom FROM {points} s LEFT JOIN edges e ON s.ogc_fid = e.ogc_fid LEFT JOIN nodes n1 ON e."from" = n1.osmid - LEFT JOIN nodes n2 ON e."to" = n2.osmid; + LEFT JOIN nodes n2 ON e."to" = n2.osmid + LEFT JOIN {population_grid} o + ON ST_Intersects(o.geom,s.geom); -- part 2 (split to save memory on parallel worker query) DROP TABLE IF EXISTS sampling_temp; CREATE TABLE sampling_temp AS SELECT point_id, edge_ogc_fid, + grid_id, metres, n1, n2, diff --git a/process/subprocesses/_09_urban_covariates.py b/process/subprocesses/_09_urban_covariates.py index 8a132c5b..5d393345 100644 --- a/process/subprocesses/_09_urban_covariates.py +++ b/process/subprocesses/_09_urban_covariates.py @@ -69,7 +69,7 @@ def main(): SELECT '{continent}'::text "Continent", '{country}'::text "Country", '{country_code}'::text "ISO 3166-1 alpha-2", - u.study_region "City", + u.study_region, u.area_sqkm "Area (sqkm)", u.pop_est "Population estimate", u.pop_per_sqkm "Population per sqkm", diff --git a/process/subprocesses/_11_export_gpkg.py b/process/subprocesses/_11_export_gpkg.py deleted file mode 100644 index 16777cd4..00000000 --- a/process/subprocesses/_11_export_gpkg.py +++ /dev/null @@ -1,71 +0,0 @@ -""" -Export geopackage. - -Export a geopackage from postgis database with relevant layers for further analysis and mapping. -""" - -import os -import subprocess as sp - -import geopandas as gpd - -# Set up project and region parameters for GHSCIC analyses -from _project_setup import * -from geoalchemy2 import Geometry, WKTElement -from script_running_log import script_running_log -from sqlalchemy import create_engine, inspect - - -def main(): - # simple timer for log file - start = time.time() - script = os.path.basename(sys.argv[0]) - task = 'Export geopackage' - - engine = create_engine( - 'postgresql://{user}:{pwd}@{host}/{db}'.format( - user=db_user, pwd=db_pwd, host=db_host, db=db, - ), - ) - - tables = [ - 'aos_public_any_nodes_30m_line', - 'aos_public_large_nodes_30m_line', - 'aos_public_osm', - f'{intersections_table}', - 'dest_type', - 'destinations', - 'edges', - 'nodes', - f'{population_grid}', - 'urban_sample_points', - 'urban_study_region', - 'urban_covariates', - ] - if gtfs_feeds is not None: - tables = tables + [gtfs['headway']] - - print('Copying input resource tables to geopackage...'), - - try: - os.remove(gpkg) - except FileNotFoundError: - pass - - for table in tables: - print(f' - {table}') - command = ( - f'ogr2ogr -update -overwrite -lco overwrite=yes -f GPKG {gpkg} ' - f'PG:"host={db_host} user={db_user} dbname={db} password={db_pwd}" ' - f' {table} ' - ) - sp.call(command, shell=True) - print(' Done.') - - # output to completion log - script_running_log(script, task, start, codename) - engine.dispose() - - -if __name__ == '__main__': - main() diff --git a/process/subprocesses/_12_neighbourhood_analysis.py b/process/subprocesses/_11_neighbourhood_analysis.py similarity index 54% rename from process/subprocesses/_12_neighbourhood_analysis.py rename to process/subprocesses/_11_neighbourhood_analysis.py index 6165962b..fbb7580b 100644 --- a/process/subprocesses/_12_neighbourhood_analysis.py +++ b/process/subprocesses/_11_neighbourhood_analysis.py @@ -1,16 +1,15 @@ """ Neighbourhood analysis. -This script creates neighbourhood indicators for sample points. +This script creates neighbourhood indicators for sample points. To run it, supply a study region code name. + +It assumes network projected network nodes and edges have been generated and stored in a PostGIS database, which can be read from as a GeoDataFrame to generate a graph. -To run it, supply a study region code name. The list of configured codenames is displayed -if run with no region name as an argument. -It is to be run after 01_study_region_setup.py, which collates and processes the required data. Once run, the sample points may be aggregated to a neighbourhood small area grid and for overall city summaries by running 03_aggregation.py. As a result of running the script, a layer is added to the study region's geopackage file -containing sample point indicators ("samplePointsData"). These indicators include: +containing sample point indicators ("sample_points"). These indicators include: 1. average population and intersection density per sample sample point 2. accessibility, dailyliving and walkability score per sample point """ @@ -19,7 +18,6 @@ import sys import time -import fiona import geopandas as gpd import networkx as nx import numpy as np @@ -28,6 +26,7 @@ # Set up project and region parameters for GHSCIC analyses from _project_setup import * +from geoalchemy2 import Geometry from setup_sp import ( binary_access_score, cal_dist_node_to_nearest_pois, @@ -36,6 +35,7 @@ filter_ids, spatial_join_index_to_gdf, ) +from sqlalchemy import create_engine, inspect, text from tqdm import tqdm # Hard coded density variable names @@ -45,29 +45,36 @@ def main(): startTime = time.time() + engine = create_engine( + f'postgresql://{db_user}:{db_pwd}@{db_host}/{db}', + pool_pre_ping=True, + connect_args={ + 'keepalives': 1, + 'keepalives_idle': 30, + 'keepalives_interval': 10, + 'keepalives_count': 5, + }, + ) + db_contents = inspect(engine) + with engine.connect() as connection: + nodes = gpd.read_postgis('nodes', connection, index_col='osmid') - for file in [gpkg, graphml_proj]: - if not os.path.exists(file): - sys.exit( - f"\n\nSpatial features required for analysis of this city ({file}) weren't able to be located; please confirm that the study region setup scripts have been successfully completed and that this file exists for this study region in the specified path.\n\n", - ) + nodes.columns = ['geometry' if x == 'geom' else x for x in nodes.columns] + nodes = nodes.set_geometry('geometry') - # Check if geopackage has a -wal file associated with it - # if so it is likely open and locked for use by another software package (e.g. QGIS) - # and will be unable to be used - if os.path.exists(f'{gpkg}-wal'): - sys.exit( - f'\nIt appears that the required geopackage {gpkg} may be open in another software package, ' - 'due to the presence of a Write Ahead Logging (WAL) file associated with it. Please ensure that the input ' - 'geopackage is not being used in any other software before continuing, and that the file ' - f"'{gpkg}-wal' is not present before continuing.", + with engine.connect() as connection: + edges = gpd.read_postgis( + 'edges_simplified', connection, index_col=['u', 'v', 'key'], ) - input_layers = fiona.listlayers(gpkg) - G_proj = ox.load_graphml(graphml_proj) + edges.columns = ['geometry' if x == 'geom' else x for x in edges.columns] + edges = edges.set_geometry('geometry') - grid = gpd.read_file(gpkg, layer=population_grid) - grid.set_index('grid_id', inplace=True) + G_proj = ox.graph_from_gdfs(nodes, edges, graph_attrs=None).to_undirected() + with engine.connect() as connection: + grid = gpd.read_postgis( + population_grid, connection, index_col='grid_id', + ) print( '\nFirst pass node-level neighbourhood analysis (Calculate average population and intersection density ' @@ -76,49 +83,29 @@ def main(): ) nh_startTime = time.time() # read from disk if exist - if 'nodes_pop_intersect_density' in input_layers: - print(' - Read population and intersection density from local file.') - gdf_nodes_simple = gpd.read_file( - gpkg, layer='nodes_pop_intersect_density', - ) - gdf_nodes_simple.set_index('osmid', inplace=True) + if db_contents.has_table('nodes_pop_intersect_density'): + print(' - Read population and intersection density from database.') + with engine.connect() as connection: + nodes_simple = gpd.read_postgis( + 'nodes_pop_intersect_density', + connection, + index_col='osmid', + geom_col='geometry', + ) else: print(' - Set up simple nodes') - gdf_nodes = ox.graph_to_gdfs(G_proj, nodes=True, edges=False) - # associate nodes with id - gdf_nodes = spatial_join_index_to_gdf(gdf_nodes, grid, dropna=False) + gdf_nodes = spatial_join_index_to_gdf(nodes, grid, dropna=False) # keep only the unique node id column gdf_nodes = gdf_nodes[['grid_id', 'geometry']] # drop any nodes which are na # (they are outside the buffered study region and not of interest) - gdf_nodes_simple = gdf_nodes[~gdf_nodes.grid_id.isna()].copy() + nodes_simple = gdf_nodes[~gdf_nodes.grid_id.isna()].copy() gdf_nodes = gdf_nodes[['grid_id']] - if ( - len( - [ - x - for x in [population_density, intersection_density] - if x not in gdf_nodes_simple.columns - ], - ) - > 0 - ): # Calculate average population and intersection density for each intersection node in study regions # taking mean values from distinct grid cells within neighbourhood buffer distance nh_grid_fields = ['pop_per_sqkm', 'intersections_per_sqkm'] - # Create a dictionary of edge index and integer values of length - # The length attribute was saved as string, so must be recast to use as weight - # The units are meters, so the decimal precision is unnecessary (error is larger than this; meter is adequate) - weight = dict( - zip( - [k for k in G_proj.edges], - [int(float(G_proj.edges[k]['length'])) for k in G_proj.edges], - ), - ) - # Add a new edge attribute using the integer weights - nx.set_edge_attributes(G_proj, weight, 'weight') # run all pairs analysis - total_nodes = len(gdf_nodes_simple) + total_nodes = len(nodes_simple) print( f' - Generate {neighbourhood_distance}m neighbourhoods ' 'for nodes (All pairs Dijkstra shortest path analysis)', @@ -128,7 +115,7 @@ def main(): (k, v.keys()) for k, v in tqdm( nx.all_pairs_dijkstra_path_length( - G_proj, chunk_size, 'weight', + G_proj, neighbourhood_distance, 'length', ), total=total_nodes, unit='nodes', @@ -154,22 +141,23 @@ def main(): .values, ) for index, n in tqdm( - np.ndenumerate(gdf_nodes_simple.index.values), + np.ndenumerate(nodes_simple.index.values), total=total_nodes, desc=' ' * 18, ) ], columns=[population_density, intersection_density], - index=gdf_nodes_simple.index.values, + index=nodes_simple.index.values, ) - gdf_nodes_simple = gdf_nodes_simple.join(result) + nodes_simple = nodes_simple.join(result) # save in geopackage (so output files are all kept together) - gdf_nodes_simple.to_file( - gpkg, layer='nodes_pop_intersect_density', driver='GPKG', - ) + with engine.connect() as connection: + nodes_simple.to_postgis( + 'nodes_pop_intersect_density', connection, index='osmid', + ) print( 'Time taken to calculate or load city local neighbourhood statistics: ' - f'{(time.time() - nh_startTime)/60:02g} mins', + f'{(time.time() - nh_startTime)/60:.02f} mins', ) # Calculate accessibility to points of interest and walkability for sample points: # 1. using pandana packadge to calculate distance to access from sample @@ -182,9 +170,8 @@ def main(): # living accessibility, populaiton density and intersections population_density; # sum these three zscores at sample point level print('\nCalculate accessibility to points of interest.') - gdf_nodes, gdf_edges = ox.graph_to_gdfs(G_proj) network = create_pdna_net( - gdf_nodes, gdf_edges, predistance=accessibility_distance, + nodes, edges, predistance=accessibility_distance, ) distance_results = {} print('\nCalculating nearest node analyses ...') @@ -192,8 +179,9 @@ def main(): print(f'\n\t- {analysis_key}') analysis = indicators['nearest_node_analyses'][analysis_key] layer_analysis_count = len(analysis['layers']) + gdf_poi_layers = {} for layer in analysis['layers']: - if layer in input_layers and layer is not None: + if db_contents.has_table(layer) and layer is not None: output_names = analysis['output_names'].copy() if layer_analysis_count > 1 and layer_analysis_count == len( analysis['output_names'], @@ -203,15 +191,18 @@ def main(): output_names[analysis['layers'].index(layer)], ] print(f'\t\t{output_names}') - gdf_poi = gpd.read_file( - analysis['geopackage'].format(gpkg=gpkg), layer=layer, - ) + if layer not in gdf_poi_layers: + with engine.connect() as connection: + gdf_poi_layers[layer] = gpd.read_postgis( + layer, connection, + ) distance_results[ f'{analysis}_{layer}' ] = cal_dist_node_to_nearest_pois( - gdf_poi, - accessibility_distance, - network, + gdf_poi_layers[layer], + geometry='geom', + distance=accessibility_distance, + network=network, category_field=analysis['category_field'], categories=analysis['categories'], filter_field=analysis['filter_field'], @@ -222,65 +213,60 @@ def main(): else: # create null results --- e.g. for GTFS analyses where no layer exists distance_results[f'{analysis_key}_{layer}'] = pd.DataFrame( - index=gdf_nodes.index, + index=nodes.index, columns=[ f'sp_nearest_node_{x}' for x in analysis['output_names'] ], ) # concatenate analysis dataframes into one - gdf_nodes_poi_dist = pd.concat( - [gdf_nodes] + [distance_results[x] for x in distance_results], axis=1, + nodes_poi_dist = pd.concat( + [nodes] + [distance_results[x] for x in distance_results], axis=1, ) - unnecessary_columns = [ - x - for x in [ - 'geometry', - 'grid_id', - 'lat', - 'lon', - 'y', - 'x', - 'highway', - 'ref', + nodes_poi_dist = nodes_poi_dist[ + [ + x + for x in nodes_poi_dist.columns + if x + not in [ + 'y', + 'x', + 'street_count', + 'lon', + 'lat', + 'ref', + 'highway', + 'geometry', + ] ] - if x in gdf_nodes_poi_dist.columns ] - gdf_nodes_poi_dist.drop( - unnecessary_columns, axis=1, inplace=True, errors='ignore', - ) # replace -999 values (meaning no destination reached in less than 500 metres) as nan - gdf_nodes_poi_dist = ( - round(gdf_nodes_poi_dist, 0).replace(-999, np.nan).astype('Int64') + nodes_poi_dist = ( + round(nodes_poi_dist, 0).replace(-999, np.nan).astype('Int64') ) # read sample points from disk (in city-specific geopackage) - samplePointsData = gpd.read_file(gpkg, layer='urban_sample_points') - # create 'grid_id' for sample point, if it not exists - if 'grid_id' not in samplePointsData.columns: - samplePointsData = spatial_join_index_to_gdf(samplePointsData, grid) - samplePointsData = filter_ids( - df=samplePointsData, - query=f"""grid_id not in {list(grid.query(f'pop_est < {population["pop_min_threshold"]}').index.values)}""", - message='Restrict sample points to those not located in grids with a population below ' - f"the minimum threshold value ({population['pop_min_threshold']})...", - ) - samplePointsData = filter_ids( - df=samplePointsData, - query=f"""n1 in {list(gdf_nodes_simple.index.values)} and n2 in {list(gdf_nodes_simple.index.values)}""", + with engine.connect() as connection: + sample_points = gpd.read_postgis('urban_sample_points', connection) + sample_points.columns = [ + 'geometry' if x == 'geom' else x for x in sample_points.columns + ] + sample_points = filter_ids( + df=sample_points, + query=f"""n1 in {list(nodes_simple.index.values)} and n2 in {list(nodes_simple.index.values)}""", message='Restrict sample points to those with two associated sample nodes...', ) - samplePointsData.set_index('point_id', inplace=True) - distance_names = list(gdf_nodes_poi_dist.columns) + sample_points.set_index('point_id', inplace=True) + distance_names = list(nodes_poi_dist.columns) # Estimate full distance to destinations for sample points full_nodes = create_full_nodes( - samplePointsData, - gdf_nodes_simple, - gdf_nodes_poi_dist, + sample_points, + nodes_simple, + nodes_poi_dist, distance_names, population_density, intersection_density, ) - samplePointsData = samplePointsData[ + sample_points = sample_points[ ['grid_id', 'edge_ogc_fid', 'geometry'] ].join(full_nodes, how='left') # create binary access scores evaluated against accessibility distance @@ -288,8 +274,8 @@ def main(): access_score_names = [ f"{x.replace('nearest_node','access')}_score" for x in distance_names ] - samplePointsData[access_score_names] = binary_access_score( - samplePointsData, distance_names, accessibility_distance, + sample_points[access_score_names] = binary_access_score( + sample_points, distance_names, accessibility_distance, ) print('Calculating sample point specific analyses ...') # Defined in generated config file, e.g. daily living score, walkability index, etc @@ -304,33 +290,32 @@ def main(): ] axis = indicators['sample_point_analyses'][analysis][var]['axis'] if formula == 'sum': - samplePointsData[var] = samplePointsData[columns].sum( - axis=axis, - ) + sample_points[var] = sample_points[columns].sum(axis=axis) if formula == 'max': - samplePointsData[var] = samplePointsData[columns].max( - axis=axis, - ) + sample_points[var] = sample_points[columns].max(axis=axis) if formula == 'sum_of_z_scores': - samplePointsData[var] = ( - ( - samplePointsData[columns] - - samplePointsData[columns].mean() - ) - / samplePointsData[columns].std() + sample_points[var] = ( + (sample_points[columns] - sample_points[columns].mean()) + / sample_points[columns].std() ).sum(axis=1) # grid_id and edge_ogc_fid are integers - samplePointsData[samplePointsData.columns[0:2]] = samplePointsData[ - samplePointsData.columns[0:2] + sample_points[sample_points.columns[0:2]] = sample_points[ + sample_points.columns[0:2] ].astype(int) # remaining non-geometry fields are float - samplePointsData[samplePointsData.columns[3:]] = samplePointsData[ - samplePointsData.columns[3:] + sample_points[sample_points.columns[3:]] = sample_points[ + sample_points.columns[3:] ].astype(float) - print('Save to geopackage...') - # save the sample points with all the desired results to a new layer in geopackage - samplePointsData = samplePointsData.reset_index() - samplePointsData.to_file(gpkg, layer='samplePointsData', driver='GPKG') + print('Save to database...') + # save the sample points with all the desired results to a new layer in the database + sample_points.columns = [ + 'geom' if x == 'geometry' else x for x in sample_points.columns + ] + sample_points = sample_points.set_geometry('geom') + with engine.connect() as connection: + sample_points.to_postgis( + point_summary, connection, index=True, if_exists='replace', + ) endTime = time.time() - startTime print(f'Total time is : {endTime / 60:.2f} minutes') diff --git a/process/subprocesses/_13_aggregation.py b/process/subprocesses/_12_aggregation.py similarity index 78% rename from process/subprocesses/_13_aggregation.py rename to process/subprocesses/_12_aggregation.py index d498c38a..95bb51f6 100644 --- a/process/subprocesses/_13_aggregation.py +++ b/process/subprocesses/_12_aggregation.py @@ -17,21 +17,32 @@ # Set up project and region parameters for GHSCIC analyses from _project_setup import * +from geoalchemy2 import Geometry from setup_aggr import ( calc_cities_pop_pct_indicators, calc_grid_pct_sp_indicators, ) +from sqlalchemy import create_engine, inspect, text def main(): startTime = time.time() - + engine = create_engine( + f'postgresql://{db_user}:{db_pwd}@{db_host}/{db}', + pool_pre_ping=True, + connect_args={ + 'keepalives': 1, + 'keepalives_idle': 30, + 'keepalives_interval': 10, + 'keepalives_count': 5, + }, + ) print('Calculating small area neighbourhood grid indicators... '), # calculate within-city indicators weighted by sample points for each city # calc_grid_pct_sp_indicators take sample point stats within each city as # input and aggregate up to grid cell indicators by calculating the mean of # sample points stats within each hex - calc_grid_pct_sp_indicators(region_config, indicators) + calc_grid_pct_sp_indicators(engine, region_config, indicators) print('Done.') print('Calculating city summary indicators... '), @@ -43,7 +54,7 @@ def main(): # in addition to the population weighted averages, unweighted averages are # also included to reflect the spatial distribution of key walkability # measures (regardless of population distribution) - calc_cities_pop_pct_indicators(region_config, indicators) + calc_cities_pop_pct_indicators(engine, region_config, indicators) print('Done.') print( f'\nAggregation completed: {(time.time() - startTime)/60.0:.02f} mins', diff --git a/process/subprocesses/_project_setup.py b/process/subprocesses/_project_setup.py index ae080239..15d28249 100644 --- a/process/subprocesses/_project_setup.py +++ b/process/subprocesses/_project_setup.py @@ -86,17 +86,21 @@ def region_data_setup( try: if data not in datasets or datasets[data] is None: raise SystemExit( - f'An entry for at least one {data} dataset does not appear to have been defined in datasets.yml. This parameter is required for analysis, and is used to cross-reference a relevant dataset defined in datasets.yml with region configuration in {region}.yml. Please update datasets.yml to proceed.', + f'\nAn entry for at least one {data} dataset does not appear to have been defined in datasets.yml. This parameter is required for analysis, and is used to cross-reference a relevant dataset defined in datasets.yml with region configuration in {region}.yml. Please update datasets.yml to proceed.\n', ) elif region_config[data] is None: raise SystemExit( - f'The entry for {data} does not appear to have been defined in {region}.yml. This parameter is required for analysis, and is used to cross-reference a relevant dataset defined in datasets.yml. Please update {region}.yml to proceed.', + f'\nThe entry for {data} does not appear to have been defined in {region}.yml. This parameter is required for analysis, and is used to cross-reference a relevant dataset defined in datasets.yml. Please update {region}.yml to proceed.\n', + ) + elif datasets[data][region_config[data]] is None: + raise SystemExit( + f'\nThe configured entry for {region_config[data]} under {data} within datasets.yml does not appear to be associated within any values. Please check and amend the specification for this entry within datasets.yml , or the configuration within {region}.yml to proceed. (is this entry and its records indented as per the provided example?)\n', ) else: if 'citation' not in datasets[data][region_config[data]]: if data != 'OpenStreetMap': raise SystemExit( - f'No citation record has been configured for the {data} dataset configured for this region. Please add this to its record in datasets.yml (see template datasets.yml for examples).', + f'\nNo citation record has been configured for the {data} dataset configured for this region. Please add this to its record in datasets.yml (see template datasets.yml for examples).\n', ) elif 'source' not in region_config['OpenStreetMap']: datasets[data][region_config[data]][ @@ -118,7 +122,7 @@ def region_data_setup( 'data_dir' ] = f"{data_path}/{datasets[data][region_config[data]]['data_dir']}" return data_dictionary - except Exception: + except Exception as e: raise e @@ -158,7 +162,7 @@ def region_dictionary_setup(region, region_config, config, folder_path): region, region_config, 'urban_region', data_path, ) r['buffered_urban_study_region'] = buffered_urban_study_region - r['db'] = f'li_{region}_{r["year"]}'.lower() + r['db'] = region.lower() r['dbComment'] = f'Liveability indicator data for {region} {r["year"]}.' r['population'] = region_data_setup( region, region_config, 'population', data_path, @@ -183,8 +187,9 @@ def region_dictionary_setup(region, region_config, config, folder_path): r[ 'gpkg' ] = f'{r["region_dir"]}/{r["study_region"]}_{study_buffer}m_buffer.gpkg' - r['grid_summary'] = f'{r["study_region"]}_grid_{resolution}_{date}' - r['city_summary'] = f'{r["study_region"]}_city_{date}' + r['point_summary'] = f'{r["study_region"]}_sample_points' + r['grid_summary'] = f'{r["study_region"]}_grid_{resolution}' + r['city_summary'] = f'{r["study_region"]}_region' if 'policy_review' in r: r['policy_review'] = f"{folder_path}/{r['policy_review']}" else: @@ -394,11 +399,7 @@ def main(): main() else: print(f'\n{authors}, version {version}') - if any( - ['_generate_reports.py' in f.filename for f in inspect.stack()[1:]], - ): - print('\nGenerate reports\n') - print(f'\nOutput directory: {region_dir.replace(folder_path,"")}\n\n') - else: - print(f'\nProcessing: {name} ({codename}{is_default_codename})\n\n') - print(f'\nOutput directory: {region_dir}\n\n') + print(f'\n{name} ({codename}{is_default_codename})') + print( + f"\nOutput directory:\n {region_dir.replace('/home/ghsci/work/','')}\n", + ) diff --git a/process/subprocesses/_report_functions.py b/process/subprocesses/_report_functions.py deleted file mode 100644 index eb65be29..00000000 --- a/process/subprocesses/_report_functions.py +++ /dev/null @@ -1,1211 +0,0 @@ -""" -Report functions. - -Define functions used for formatting and saving indicator reports. -""" -import json -import os -import time -from textwrap import wrap - -import fiona -import geopandas as gpd -import matplotlib as mpl -import matplotlib.font_manager as fm -import matplotlib.pyplot as plt -import matplotlib.ticker as ticker -import numpy as np -import pandas as pd -from babel.numbers import format_decimal as fnum -from babel.units import format_unit -from fpdf import FPDF, FlexTemplate -from matplotlib.cm import ScalarMappable -from matplotlib.lines import Line2D -from mpl_toolkits.axes_grid1 import make_axes_locatable -from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar -from mpl_toolkits.axes_grid1.inset_locator import inset_axes -from subprocesses.batlow import batlow_map as cmap - - -def get_and_setup_language_cities(config): - """Setup and return languages for given configuration.""" - if config.auto_language: - languages = pd.read_excel(config.configuration, sheet_name='languages') - languages = languages[languages['name'] == config.city].dropna( - axis=1, how='all', - ) - languages = list(languages.columns[2:]) - else: - languages = [config.language] - return languages - - -def generate_report_for_language( - config, language, indicators, policies, -): - """Generate report for a processed city in a given language.""" - city = config.city - font = get_and_setup_font(language, config) - # set up policies - city_policy = policy_data_setup(policies, config.region['policy_review']) - # get city and grid summary data - gpkg = config.region['gpkg'] - if not os.path.isfile(gpkg): - raise Exception( - f'\n\nRequired file {gpkg} could not be located. To proceed with report generation, please ensure that analysis for this region, including export of geopackage, has been completed successfully.\n\n', - ) - layers = fiona.listlayers(gpkg) - if not any([x.startswith(city) for x in layers]): - raise Exception( - f'\n\nResult summary layers for grid and city could not be located in the geopackage {gpkg}. To proceed with report generation, please ensure that analysis for this region has been completed successfully.\n\n', - ) - gdfs = {} - for gdf in ['city', 'grid']: - gdfs[gdf] = gpd.read_file( - gpkg, - layer=[ - layer - for layer in layers - if layer.startswith( - config.region[f'{gdf}_summary'].strip( - time.strftime('%Y-%m-%d'), - ), - ) - ][0], - ) - # The below currently relates walkability to specified reference - # (e.g. the GHSCIC 25 city median, following standardisation using - # 25-city mean and standard deviation for sub-indicators) - gdfs['grid'] = evaluate_comparative_walkability( - gdfs['grid'], indicators['report']['walkability']['ghscic_reference'], - ) - indicators['report']['walkability'][ - 'walkability_above_median_pct' - ] = evaluate_threshold_pct( - gdfs['grid'], - 'all_cities_walkability', - '>', - indicators['report']['walkability']['ghscic_walkability_reference'], - ) - for i in indicators['report']['thresholds']: - indicators['report']['thresholds'][i]['pct'] = evaluate_threshold_pct( - gdfs['grid'], - indicators['report']['thresholds'][i]['field'], - indicators['report']['thresholds'][i]['relationship'], - indicators['report']['thresholds'][i]['criteria'], - ) - # set up phrases - phrases = prepare_phrases(config, city, language) - # Generate resources - if config.generate_resources: - capture_return = generate_resources( - config, - gdfs['city'], - gdfs['grid'], - phrases, - indicators, - city_policy, - language, - cmap, - ) - # instantiate template - for template in config.templates: - print(f' [{template}]') - capture_return = generate_scorecard( - config, phrases, indicators, city_policy, language, template, font, - ) - print(capture_return) - - -def get_and_setup_font(language, config): - """Setup and return font for given language configuration.""" - fonts = pd.read_excel(config.configuration, sheet_name='fonts') - if language.replace(' (Auto-translation)', '') in fonts.Language.unique(): - fonts = fonts.loc[ - fonts['Language'] == language.replace(' (Auto-translation)', '') - ].fillna('') - else: - fonts = fonts.loc[fonts['Language'] == 'default'].fillna('') - main_font = fonts.File.values[0].strip() - fm.fontManager.addfont(main_font) - prop = fm.FontProperties(fname=main_font) - fm.findfont(prop=prop, directory=main_font, rebuild_if_missing=True) - plt.rcParams['font.family'] = prop.get_name() - font = fonts.Font.values[0] - return font - - -def policy_data_setup(policies, policy_review): - """Returns a dictionary of policy data.""" - review = pd.read_excel(policy_review, index_col=0) - df_policy = {} - # Presence score - df_policy['Presence_rating'] = review.loc['Score']['Policy identified'] - # Quality score - df_policy['Checklist_rating'] = review.loc['Score']['Quality'] - # Presence - df_policy['Presence'] = review.loc[ - [p['Policy'] for p in policies if p['Display'] == 'Presence'] - ].apply(lambda x: x['Weight'] * x['Policy identified'], axis=1) - # GDP - df_policy['Presence_gdp'] = pd.DataFrame( - [ - { - c: p[c] - for c in p - if c - in ['Label', 'gdp_comparison_middle', 'gdp_comparison_upper'] - } - for p in policies - if p['Display'] == 'Presence' - ], - ) - df_policy['Presence_gdp'].columns = ['Policy', 'middle', 'upper'] - df_policy['Presence_gdp'].set_index('Policy', inplace=True) - # Urban Checklist - df_policy['Checklist'] = review.loc[ - [p['Policy'] for p in policies if p['Display'] == 'Checklist'] - ]['Checklist'] - # Public open space checklist - df_policy['POS'] = review.loc[ - [p['Policy'] for p in policies if p['Display'] == 'POS'] - ]['Checklist'] - # Public transport checklist - df_policy['PT'] = review.loc[ - [p['Policy'] for p in policies if p['Display'] == 'PT'] - ]['Checklist'] - return df_policy - - -def evaluate_comparative_walkability(gdf_grid, reference): - """Evaluate walkability relative to 25-city study reference.""" - for x in reference: - gdf_grid[f'z_{x}'] = (gdf_grid[x] - reference[x]['mean']) / reference[ - x - ]['sd'] - gdf_grid['all_cities_walkability'] = sum( - [gdf_grid[f'z_{x}'] for x in reference], - ) - return gdf_grid - - -def evaluate_threshold_pct( - gdf_grid, indicator, relationship, reference, population='pop_est', -): - """Evaluate whether a pandas series meets a threshold criteria (eg. '<' or '>'.""" - percentage = round( - 100 - * gdf_grid.query(f'{indicator} {relationship} {reference}')[ - population - ].sum() - / gdf_grid[population].sum(), - 1, - ) - return percentage - - -def generate_resources( - config, - gdf_city, - gdf_grid, - phrases, - indicators, - city_policy, - language, - cmap, -): - """ - The function prepares a series of image resources required for the global indicator score cards. - - The city_path string variable is returned, where generated resources will be stored upon successful execution. - """ - figure_path = f'{config.region["region_dir"]}/figures' - locale = phrases['locale'] - city_stats = compile_city_stats(gdf_city, indicators, phrases) - if not os.path.exists(figure_path): - os.mkdir(figure_path) - # Spatial access liveability profile - li_profile( - city_stats=city_stats, - title=phrases['Population % with access within 500m to...'], - cmap=cmap, - phrases=phrases, - path=f'{figure_path}/access_profile_{language}.jpg', - ) - ## constrain extreme outlying walkability for representation - gdf_grid['all_cities_walkability'] = gdf_grid[ - 'all_cities_walkability' - ].apply(lambda x: -6 if x < -6 else (6 if x > 6 else x)) - # Spatial distribution maps - spatial_maps = compile_spatial_map_info( - indicators['report']['spatial_distribution_figures'], - gdf_city, - phrases, - locale, - language=language, - ) - for f in spatial_maps: - spatial_dist_map( - gdf_grid, - column=f, - range=spatial_maps[f]['range'], - label=spatial_maps[f]['label'], - tick_labels=spatial_maps[f]['tick_labels'], - cmap=cmap, - path=f'{figure_path}/{spatial_maps[f]["outfile"]}', - phrases=phrases, - locale=locale, - ) - # Threshold maps - for scenario in indicators['report']['thresholds']: - threshold_map( - gdf_grid, - column=indicators['report']['thresholds'][scenario]['field'], - scale=indicators['report']['thresholds'][scenario]['scale'], - comparison=indicators['report']['thresholds'][scenario][ - 'criteria' - ], - label=( - f"{phrases[indicators['report']['thresholds'][scenario]['title']]} ({phrases['density_units']})" - ), - cmap=cmap, - path=f"{figure_path}/{indicators['report']['thresholds'][scenario]['field']}_{language}.jpg", - phrases=phrases, - locale=locale, - ) - # Policy ratings - policy_rating( - range=[0, 24], - score=city_policy['Presence_rating'], - comparison=indicators['report']['policy']['comparisons']['presence'], - label='', - comparison_label=phrases['25 city comparison'], - cmap=cmap, - locale=locale, - path=f'{figure_path}/policy_presence_rating_{language}.jpg', - ) - policy_rating( - range=[0, 57], - score=city_policy['Checklist_rating'], - comparison=indicators['report']['policy']['comparisons']['quality'], - label='', - comparison_label=phrases['25 city comparison'], - cmap=cmap, - locale=locale, - path=f'{figure_path}/policy_checklist_rating_{language}.jpg', - ) - return figure_path - - -def fpdf2_mm_scale(mm): - """Returns a width double that of the conversion of mm to inches. - - This has been found, via trial and error, to be useful when preparing images for display in generated PDFs using fpdf2. - """ - return 2 * mm / 25.4 - - -def _pct(value, locale, length='short'): - """Formats a percentage sign according to a given locale.""" - return format_unit(value, 'percent', locale=locale, length=length) - - -def compile_city_stats(gdf_city, indicators, phrases): - """Compile a set of city statistics with comparisons, given a processed geodataframe of city summary statistics and a dictionary of indicators including reference percentiles.""" - city_stats = {} - city_stats['access'] = gdf_city[ - indicators['report']['accessibility'].keys() - ].transpose()[0] - city_stats['access'].index = [ - indicators['report']['accessibility'][x]['title'] - if city_stats['access'][x] is not None - else f"{indicators['report']['accessibility'][x]['title']} (not evaluated)" - for x in city_stats['access'].index - ] - city_stats['access'] = city_stats['access'].fillna( - 0, - ) # for display purposes - city_stats['comparisons'] = { - indicators['report']['accessibility'][x]['title']: indicators[ - 'report' - ]['accessibility'][x]['ghscic_reference'] - for x in indicators['report']['accessibility'] - } - city_stats['percentiles'] = {} - for percentile in ['p25', 'p50', 'p75']: - city_stats['percentiles'][percentile] = [ - city_stats['comparisons'][x][percentile] - for x in city_stats['comparisons'].keys() - ] - city_stats['access'].index = [ - phrases[x] for x in city_stats['access'].index - ] - return city_stats - - -def compile_spatial_map_info( - spatial_distribution_figures, gdf_city, phrases, locale, language, -): - """ - Compile required information to produce spatial distribution figures. - - This is done using the information recorded in configuration/indicators.yml; specifically, indicators['report']['spatial_distribution_figures'] - """ - # effectively deep copy the supplied dictionary so its not mutable - spatial_maps = json.loads(json.dumps(spatial_distribution_figures)) - for i in spatial_maps: - for text in ['label', 'outfile']: - spatial_maps[i][text] = spatial_maps[i][text].format(**locals()) - if spatial_maps[i]['tick_labels'] is not None: - spatial_maps[i]['tick_labels'] = [ - x.format(**{'phrases': phrases}) - for x in spatial_maps[i]['tick_labels'] - ] - if i.startswith('pct_'): - city_summary_percent = _pct( - fnum(gdf_city[f'pop_{i}'].fillna(0)[0], '0.0', locale), locale, - ) - spatial_maps[i][ - 'label' - ] = f'{spatial_maps[i]["label"]} ({city_summary_percent})' - if gdf_city['pop_pct_access_500m_pt_gtfs_freq_20_score'][ - 0 - ] is None or pd.isna( - gdf_city['pop_pct_access_500m_pt_gtfs_freq_20_score'][0], - ): - spatial_maps['pct_access_500m_pt_any_score'] = spatial_maps.pop( - 'pct_access_500m_pt_gtfs_freq_20_score', - ) - spatial_maps['pct_access_500m_pt_any_score']['label'] = ( - f'{phrases["Percentage of population with access to public transport"]}\n' - f'({_pct(fnum(gdf_city["pop_pct_access_500m_pt_any_score"][0],"0.0",locale),locale)})' - ) - return spatial_maps - - -def add_scalebar( - ax, - length, - multiplier, - units, - fontproperties, - loc='upper left', - pad=0, - color='black', - frameon=False, - size_vertical=2, - locale='en', -): - """ - Adds a scalebar to matplotlib map. - - Requires import of: from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar - As a rule of thumb, a scalebar of 1/3 of feature size seems appropriate. - For example, to achieve this, calculate the variable 'length' as - - gdf_width = gdf.geometry.total_bounds[2] - gdf.geometry.total_bounds[0] - scalebar_length = int(gdf_width / (3000)) - """ - scalebar = AnchoredSizeBar( - ax.transData, - length * multiplier, - format_unit(length, units, locale=locale, length='short'), - loc=loc, - pad=pad, - color=color, - frameon=frameon, - size_vertical=size_vertical, - fontproperties=fontproperties, - ) - ax.add_artist(scalebar) - - -def add_localised_north_arrow( - ax, - text='N', - xy=(1, 0.96), - textsize=14, - arrowprops=dict(facecolor='black', width=4, headwidth=8), -): - """ - Add a minimal north arrow with custom text label above it to a matplotlib map. - - This can be used to add, for example, 'N' or other language equivalent. Default placement is in upper right corner of map. - """ - arrow = ax.annotate( - '', - xy=(1, 0.96), - xycoords=ax.transAxes, - xytext=(0, -0.5), - textcoords='offset pixels', - va='center', - ha='center', - arrowprops=arrowprops, - ) - ax.annotate( - text, - xy=(0.5, 1.5), - xycoords=arrow, - va='center', - ha='center', - fontsize=textsize, - ) - - -## radar chart -def li_profile( - city_stats, - title, - cmap, - path, - phrases, - width=fpdf2_mm_scale(80), - height=fpdf2_mm_scale(80), - dpi=300, -): - """ - Generates a radar chart for city liveability profiles. - - Expands on https://www.python-graph-gallery.com/web-circular-barplot-with-matplotlib - -- A python code blog post by Yan Holtz, in turn expanding on work of Tomás Capretto and Tobias Stadler. - """ - figsize = (width, height) - # Values for the x axis - ANGLES = np.linspace( - 0.15, 2 * np.pi - 0.05, len(city_stats['access']), endpoint=False, - ) - VALUES = city_stats['access'].values - COMPARISON = city_stats['percentiles']['p50'] - INDICATORS = city_stats['access'].index - # Colours - GREY12 = '#1f1f1f' - norm = mpl.colors.Normalize(vmin=0, vmax=100) - COLORS = cmap(list(norm(VALUES))) - # Initialize layout in polar coordinates - textsize = 11 - fig, ax = plt.subplots(figsize=figsize, subplot_kw={'projection': 'polar'}) - # Set background color to white, both axis and figure. - fig.patch.set_facecolor('white') - ax.set_facecolor('white') - ax.set_theta_offset(1.2 * np.pi / 2) - ax.set_ylim(-50, 125) - # Add geometries to the plot ------------------------------------- - # Add bars to represent the cumulative track lengths - ax.bar(ANGLES, VALUES, color=COLORS, alpha=0.9, width=0.52, zorder=10) - # Add interquartile comparison reference lines - ax.vlines( - ANGLES, - city_stats['percentiles']['p25'], - city_stats['percentiles']['p75'], - color=GREY12, - zorder=11, - ) - # Add dots to represent the mean gain - comparison_text = '\n'.join( - wrap(phrases['25 city comparison'], 17, break_long_words=False), - ) - ax.scatter( - ANGLES, - COMPARISON, - s=60, - color=GREY12, - zorder=11, - label=comparison_text, - ) - # Add labels for the indicators - try: - LABELS = [ - '\n'.join(wrap(r, 12, break_long_words=False)) for r in INDICATORS - ] - except Exception: - LABELS = INDICATORS - # Set the labels - ax.set_xticks(ANGLES) - ax.set_xticklabels(LABELS, size=textsize) - # Remove lines for polar axis (x) - ax.xaxis.grid(False) - # Put grid lines for radial axis (y) at 0, 1000, 2000, and 3000 - ax.set_yticklabels([]) - ax.set_yticks([0, 25, 50, 75, 100]) - # Remove spines - ax.spines['start'].set_color('none') - ax.spines['polar'].set_color('none') - # Adjust padding of the x axis labels ---------------------------- - # This is going to add extra space around the labels for the - # ticks of the x axis. - XTICKS = ax.xaxis.get_major_ticks() - for tick in XTICKS: - tick.set_pad(10) - # Add custom annotations ----------------------------------------- - # The following represent the heights in the values of the y axis - PAD = 0 - for num in [0, 50, 100]: - ax.text( - -0.2 * np.pi / 2, - num + PAD, - f'{num}%', - ha='center', - va='center', - backgroundcolor='white', - size=textsize, - ) - # Add text to explain the meaning of the height of the bar and the - # height of the dot - ax.text( - ANGLES[0], - -50, - '\n'.join(wrap(title, 13, break_long_words=False)), - rotation=0, - ha='center', - va='center', - size=textsize, - zorder=12, - ) - angle = np.deg2rad(130) - ax.legend( - loc='lower right', - bbox_to_anchor=(0.58 + np.cos(angle) / 2, 0.46 + np.sin(angle) / 2), - ) - fig.savefig(path, dpi=dpi) - plt.close(fig) - - -## Spatial distribution mapping -def spatial_dist_map( - gdf, - column, - range, - label, - tick_labels, - cmap, - path, - width=fpdf2_mm_scale(88), - height=fpdf2_mm_scale(80), - dpi=300, - phrases={'north arrow': 'N', 'km': 'km'}, - locale='en', -): - """Spatial distribution maps using geopandas geodataframe.""" - figsize = (width, height) - textsize = 14 - fig, ax = plt.subplots(figsize=figsize) - ax.set_axis_off() - divider = make_axes_locatable(ax) # Define 'divider' for the axes - # Legend axes will be located at the 'bottom' of figure, with width '5%' of ax and - # a padding between them equal to '0.1' inches - cax = divider.append_axes('bottom', size='5%', pad=0.1) - gdf.plot( - column=column, - ax=ax, - legend=True, - vmin=range[0], - vmax=range[1], - legend_kwds={ - 'label': '\n'.join(wrap(label, 60, break_long_words=False)) - if label.find('\n') < 0 - else label, - 'orientation': 'horizontal', - }, - cax=cax, - cmap=cmap, - ) - # scalebar - add_scalebar( - ax, - length=int( - (gdf.geometry.total_bounds[2] - gdf.geometry.total_bounds[0]) - / (3000), - ), - multiplier=1000, - units='kilometer', - locale=locale, - fontproperties=fm.FontProperties(size=textsize), - ) - # north arrow - add_localised_north_arrow(ax, text=phrases['north arrow']) - # axis formatting - cax.tick_params(labelsize=textsize) - cax.xaxis.label.set_size(textsize) - if tick_labels is not None: - # cax.set_xticks(cax.get_xticks().tolist()) - # cax.set_xticklabels(tick_labels) - cax.xaxis.set_major_locator(ticker.MaxNLocator(len(tick_labels))) - ticks_loc = cax.get_xticks().tolist() - cax.xaxis.set_major_locator(ticker.FixedLocator(ticks_loc)) - cax.set_xticklabels(tick_labels) - plt.tight_layout() - fig.savefig(path, dpi=dpi) - plt.close(fig) - - -def threshold_map( - gdf, - column, - comparison, - scale, - label, - cmap, - path, - width=fpdf2_mm_scale(88), - height=fpdf2_mm_scale(80), - dpi=300, - phrases={'north arrow': 'N', 'km': 'km'}, - locale='en', -): - """Create threshold indicator map.""" - figsize = (width, height) - textsize = 14 - fig, ax = plt.subplots(figsize=figsize) - ax.set_axis_off() - divider = make_axes_locatable(ax) # Define 'divider' for the axes - # Legend axes will be located at the 'bottom' of figure, with width '5%' of ax and - # a padding between them equal to '0.1' inches - cax = divider.append_axes('bottom', size='5%', pad=0.1) - gdf.plot( - column=column, - ax=ax, - legend=True, - legend_kwds={ - 'label': '\n'.join(wrap(label, 60, break_long_words=False)) - if label.find('\n') < 0 - else label, - 'orientation': 'horizontal', - }, - cax=cax, - cmap=cmap, - ) - # scalebar - add_scalebar( - ax, - length=int( - (gdf.geometry.total_bounds[2] - gdf.geometry.total_bounds[0]) - / (3000), - ), - multiplier=1000, - units='kilometer', - locale=locale, - fontproperties=fm.FontProperties(size=textsize), - ) - # north arrow - add_localised_north_arrow(ax, text=phrases['north arrow']) - # axis formatting - cax.xaxis.set_major_formatter(ticker.EngFormatter()) - cax.tick_params(labelsize=textsize) - cax.xaxis.label.set_size(textsize) - plt.tight_layout() - fig.savefig(path, dpi=dpi) - plt.close(fig) - - -def policy_rating( - range, - score, - cmap, - comparison=None, - width=fpdf2_mm_scale(70), - height=fpdf2_mm_scale(15), - label='Policies identified', - comparison_label='25 city median', - locale='en', - path='policy_rating_test.jpg', - dpi=300, -): - """ - Plot a score (policy rating) and optional comparison (e.g. 25 cities median score) on a colour bar. - - Applied in this context for policy presence and policy quality scores. - """ - textsize = 14 - fig, ax = plt.subplots(figsize=(width, height)) - fig.subplots_adjust(bottom=0) - cmap = cmap - norm = mpl.colors.Normalize(vmin=range[0], vmax=range[1]) - fig.colorbar( - mpl.cm.ScalarMappable(norm=norm, cmap=cmap), - cax=ax, - orientation='horizontal', - # shrink=0.9, pad=0, aspect=90 - ) - # Format Global ticks - if comparison is None: - ax.xaxis.set_ticks([]) - else: - ax.xaxis.set_major_locator(ticker.FixedLocator([comparison])) - # ax.set_xticklabels([comparison_label]) - ax.set_xticklabels(['']) - ax.tick_params(labelsize=textsize) - ax.plot( - comparison, - 0, - marker='v', - color='black', - markersize=9, - zorder=10, - clip_on=False, - ) - if comparison < 7: - for t in ax.get_yticklabels(): - t.set_horizontalalignment('left') - if comparison > 18: - for t in ax.get_yticklabels(): - t.set_horizontalalignment('right') - # Format City ticks - ax_city = ax.twiny() - ax_city.set_xlim(range) - ax_city.xaxis.set_major_locator(ticker.FixedLocator([score])) - ax_city.plot( - score, - 1, - marker='^', - color='black', - markersize=9, - zorder=10, - clip_on=False, - ) - sep = '' - # if comparison is not None and label=='': - ax_city.set_xticklabels( - [f"{sep}{str(score).rstrip('0').rstrip('.')}/{range[1]}{label}"], - ) - ax_city.tick_params(labelsize=textsize) - # return figure with final styling - xlabel = f"{comparison_label} ({fnum(comparison,'0.0',locale)})" - ax.set_xlabel( - xlabel, labelpad=0.5, fontsize=textsize, - ) - plt.tight_layout() - fig.savefig(path, dpi=dpi) - plt.close(fig) - - -def pdf_template_setup( - config, template='template_web', font=None, language='English', -): - """ - Takes a template xlsx sheet defining elements for use in fpdf2's FlexTemplate function. - - This is loosely based on the specification at https://pyfpdf.github.io/fpdf2/Templates.html - However, it has been modified to allow additional definitions which are parsed - by this function - - can define the page for which template elements are to be applied - - colours are specified using standard hexadecimal codes - Any blank cells are set to represent "None". - The function returns a dictionary of elements, indexed by page number strings. - """ - # read in elements - elements = pd.read_excel(config.configuration, sheet_name=template) - document_pages = elements.page.unique() - # Conditional formatting to help avoid inappropriate line breaks and gaps in Tamil and Thai - if language in ['Tamil', 'Thai']: - elements['align'] = elements['align'].replace('J', 'L') - elements.loc[ - (elements['type'] == 'T') & (elements['size'] < 12), 'size', - ] = ( - elements.loc[ - (elements['type'] == 'T') & (elements['size'] < 12), 'size', - ] - - 1 - ) - if font is not None: - elements.loc[elements.font == 'custom', 'font'] = font - elements = elements.to_dict(orient='records') - elements = [ - {k: v if not str(v) == 'nan' else None for k, v in x.items()} - for x in elements - ] - # Need to convert hexadecimal colours (eg FFFFFF is white) to - # decimal colours for the fpdf Template class to work - # We'll establish default hex colours for foreground and background - planes = {'foreground': '000000', 'background': 'FFFFFF'} - for i, element in enumerate(elements): - for plane in planes: - if elements[i][plane] is not None: - # this assumes a hexadecimal string without the 0x prefix - elements[i][plane] = int(elements[i][plane], 16) - else: - elements[i][plane] = int(planes[plane], 16) - pages = {} - for page in document_pages: - pages[f'{page}'] = [x for x in elements if x['page'] == page] - return pages - - -def format_pages(pages, phrases): - """Format pages with phrases.""" - for page in pages: - for i, item in enumerate(pages[page]): - if item['name'] in phrases: - try: - pages[page][i]['text'] = phrases[item['name']].format( - city=phrases['city_name'], - country=phrases['country_name'], - study_doi=phrases['study_doi'], - citation_series=phrases['citation_series'], - citation_doi=phrases['citation_doi'], - citation_population=phrases['citation_population'], - citation_boundaries=phrases['citation_boundaries'], - citation_features=phrases['citation_features'], - citation_colour=phrases['citation_colour'], - ) - except Exception: - pages[f'{page}'][i]['text'] = phrases[item['name']] - return pages - - -def prepare_phrases(config, city, language): - """Prepare dictionary for specific language translation given English phrase.""" - languages = pd.read_excel(config.configuration, sheet_name='languages') - phrases = json.loads(languages.set_index('name').to_json())[language] - city_details = pd.read_excel( - config.configuration, sheet_name='city_details', index_col='City', - ) - country_code = config.region['country_code'] - # set default English country code - if language == 'English' and country_code not in ['AU', 'GB', 'US']: - country_code = 'AU' - phrases['locale'] = f'{phrases["language_code"]}_{country_code}' - # extract English language variables - phrases['metadata_author'] = languages.loc[ - languages['name'] == 'title_author', 'English', - ].values[0] - phrases['metadata_title1'] = languages.loc[ - languages['name'] == 'title_series_line1', 'English', - ].values[0] - phrases['metadata_title2'] = languages.loc[ - languages['name'] == 'title_series_line2', 'English', - ].values[0] - phrases['country'] = languages.loc[ - languages['name'] == f'{city} - Country', 'English', - ].values[0] - # restrict to specific language - languages = languages.loc[ - languages['role'] == 'template', ['name', language], - ] - phrases['vernacular'] = languages.loc[ - languages['name'] == 'language', language, - ].values[0] - phrases['city_name'] = languages.loc[ - languages['name'] == city, language, - ].values[0] - phrases['country_name'] = languages.loc[ - languages['name'] == f'{city} - Country', language, - ].values[0] - phrases['city'] = config.region['name'] - phrases['study_doi'] = f'https://doi.org/{city_details["DOI"]["Study"]}' - phrases['city_doi'] = f'https://doi.org/{city_details["DOI"][city]}' - phrases['study_executive_names'] = city_details['Names']['Study'] - phrases['local_collaborators_names'] = city_details['Names'][city] - phrases['Image 1 file'] = city_details['Image 1 file'][city] - phrases['Image 2 file'] = city_details['Image 2 file'][city] - phrases['Image 1 credit'] = city_details['Image 1 credit'][city] - phrases['Image 2 credit'] = city_details['Image 2 credit'][city] - phrases['region_population_citation'] = config.region['population'][ - 'citation' - ] - phrases['region_urban_region_citation'] = config.region['urban_region'][ - 'citation' - ] - phrases['region_OpenStreetMap_citation'] = config.region['OpenStreetMap'][ - 'citation' - ] - # incoporating study citations - citation_json = json.loads(city_details['exceptions_json']['Study']) - # handle city-specific exceptions - city_exceptions = json.loads(city_details['exceptions_json'][city]) - if language in city_exceptions: - city_exceptions = json.loads( - city_exceptions[language].replace("'", '"'), - ) - for e in city_exceptions: - phrases[e] = city_exceptions[e].replace('|', '\n') - for citation in citation_json: - if citation != 'citation_doi' or 'citation_doi' not in phrases: - phrases[citation] = ( - citation_json[citation].replace('|', '\n').format(**phrases) - ) - phrases['citation_doi'] = phrases['citation_doi'].format(**phrases) - return phrases - - -def wrap_sentences(words, limit=50, delimiter=''): - """Wrap sentences if exceeding limit.""" - sentences = [] - sentence = '' - gap = len(delimiter) - for i, word in enumerate(words): - if i == 0: - sentence = word - continue - # combine word to sentence if under limit - if len(sentence) + gap + len(word) <= limit: - sentence = sentence + delimiter + word - else: - sentences.append(sentence) - sentence = word - # append the final word if not yet appended - if i == len(words) - 1: - sentences.append(sentence) - # finally, append sentence of all words if still below limit - if (i == len(words) - 1) and (sentences == []): - sentences.append(sentence) - return sentences - - -def prepare_pdf_fonts(pdf, config, language): - """Prepare PDF fonts.""" - fonts = pd.read_excel(config.configuration, sheet_name='fonts') - fonts = ( - fonts.loc[ - fonts['Language'].isin( - ['default', language.replace(' (Auto-translation)', '')], - ) - ] - .fillna('') - .drop_duplicates() - ) - for s in ['', 'B', 'I', 'BI']: - for langue in ['default', language]: - if ( - langue.replace(' (Auto-translation)', '') - in fonts.Language.unique() - ): - f = fonts.loc[ - ( - fonts['Language'] - == langue.replace(' (Auto-translation)', '') - ) - & (fonts['Style'] == s) - ] - if f'{f.Font.values[0]}{s}' not in pdf.fonts.keys(): - pdf.add_font( - f.Font.values[0], style=s, fname=f.File.values[0], - ) - - -def save_pdf_layout(pdf, folder, template, filename): - """Save a PDF report in template subfolder in specified location.""" - if not os.path.exists(folder): - os.mkdir(folder) - template_folder = f'{folder}/_{template} reports' - if not os.path.exists(template_folder): - os.mkdir(template_folder) - pdf.output(f'{template_folder}/{filename}') - return f'Scorecard generated ({template_folder}):\n{filename}\n' - - -def generate_scorecard( - config, - phrases, - indicators, - city_policy, - language='English', - template='web', - font=None, -): - """ - Format a PDF using the pyfpdf FPDF2 library, and drawing on definitions from a UTF-8 CSV file. - - Included in this function is the marking of a policy 'scorecard', with ticks, crosses, etc. - """ - locale = phrases['locale'] - # Set up PDF document template pages - pages = pdf_template_setup(config, 'template_web', font, language) - pages = format_pages(pages, phrases) - # initialise PDF - pdf = FPDF(orientation='portrait', format='A4', unit='mm') - # set up fonts - prepare_pdf_fonts(pdf, config, language) - pdf.set_author(phrases['metadata_author']) - pdf.set_title(f"{phrases['metadata_title1']} {phrases['metadata_title2']}") - pdf.set_auto_page_break(False) - if template == 'web': - pdf = pdf_for_web( - pdf, - pages, - config, - language, - locale, - phrases, - indicators, - city_policy, - ) - elif template == 'print': - pdf = pdf_for_print( - pdf, - pages, - config, - language, - locale, - phrases, - indicators, - city_policy, - ) - # Output report pdf - filename = f"{phrases['city_name']} - {phrases['title_series_line1'].replace(':','')} - GHSCIC 2022 - {phrases['vernacular']}.pdf" - if phrases['_export'] == 1: - capture_result = save_pdf_layout( - pdf, - folder=config.region['region_dir'], - template=template, - filename=filename, - ) - return capture_result - else: - return 'Skipped.' - - -def pdf_for_web( - pdf, pages, config, language, locale, phrases, indicators, city_policy, -): - """ - Generate a PDF based on a template for web distribution. - - This template includes reporting on both policy and spatial indicators. - """ - city = config.city - city_path = config.region['region_dir'] - figure_path = f'{city_path}/figures' - # Set up Cover page - pdf.add_page() - template = FlexTemplate(pdf, elements=pages['1']) - if os.path.exists( - f'{config.folder_path}/process/configuration/assets/{phrases["Image 1 file"]}', - ): - template[ - 'hero_image' - ] = f'{config.folder_path}/process/configuration/assets/{phrases["Image 1 file"]}' - template['hero_alt'] = '' - template['Image 1 credit'] = phrases['Image 1 credit'] - template.render() - # Set up next page - pdf.add_page() - template = FlexTemplate(pdf, elements=pages['2']) - template['citations'] = phrases['citations'] - template['study_executive_names'] = phrases['study_executive_names'] - template['local_collaborators'] = template['local_collaborators'].format( - title_city=phrases['title_city'], - ) - template['local_collaborators_names'] = phrases[ - 'local_collaborators_names' - ] - if phrases['translation_names'] is None: - template['translation'] = '' - template['translation_names'] = '' - template.render() - # Set up next page - pdf.add_page() - template = FlexTemplate(pdf, elements=pages['3']) - template[ - 'introduction' - ] = f"{phrases['series_intro']}\n\n{phrases['series_interpretation']}".format( - **phrases, - ) - ## Access profile plot - template['access_profile'] = f'{figure_path}/access_profile_{language}.jpg' - ## Walkability plot - template[ - 'all_cities_walkability' - ] = f'{figure_path}/all_cities_walkability_{language}.jpg' - template['walkability_above_median_pct'] = phrases[ - 'walkability_above_median_pct' - ].format( - _pct( - fnum( - indicators['report']['walkability'][ - 'walkability_above_median_pct' - ], - '0.0', - locale, - ), - locale, - ), - ) - ## Policy ratings - template[ - 'presence_rating' - ] = f'{figure_path}/policy_presence_rating_{language}.jpg' - template[ - 'quality_rating' - ] = f'{figure_path}/policy_checklist_rating_{language}.jpg' - template['city_header'] = phrases['city_name'] - ## City planning requirement presence (round 0.5 up to 1) - policy_indicators = {0: '✗', 0.5: '~', 1: '✓'} - for x in range(1, 7): - # check presence - template[f'policy_urban_text{x}_response'] = policy_indicators[ - city_policy['Presence'][x - 1] - ] - # format percentage units according to locale - for gdp in ['middle', 'upper']: - template[f'policy_urban_text{x}_{gdp}'] = _pct( - float(city_policy['Presence_gdp'].iloc[x - 1][gdp]), - locale, - length='short', - ) - ## Walkable neighbourhood policy checklist - for i, policy in enumerate(city_policy['Checklist'].index): - row = i + 1 - for j, item in enumerate([x for x in city_policy['Checklist'][i]]): - col = j + 1 - template[f"policy_{'Checklist'}_text{row}_response{col}"] = item - template.render() - # Set up next page - pdf.add_page() - template = FlexTemplate(pdf, elements=pages['4']) - ## Density plots - template[ - 'local_nh_population_density' - ] = f'{figure_path}/local_nh_population_density_{language}.jpg' - template[ - 'local_nh_intersection_density' - ] = f'{figure_path}/local_nh_intersection_density_{language}.jpg' - ## Density threshold captions - for scenario in indicators['report']['thresholds']: - template[scenario] = phrases[f'optimal_range - {scenario}'].format( - _pct( - fnum( - indicators['report']['thresholds'][scenario]['pct'], - '0.0', - locale, - ), - locale, - ), - fnum( - indicators['report']['thresholds'][scenario]['criteria'], - '#,000', - locale, - ), - phrases['density_units'], - ) - if os.path.exists( - f'{config.folder_path}/process/configuration/assets/{phrases["Image 2 file"]}', - ): - template[ - 'hero_image_2' - ] = f'{config.folder_path}/process/configuration/assets/{phrases["Image 2 file"]}' - template['hero_alt_2'] = '' - template['Image 2 credit'] = phrases['Image 2 credit'] - template.render() - # Set up next page - pdf.add_page() - template = FlexTemplate(pdf, elements=pages['5']) - template[ - 'pct_access_500m_pt.jpg' - ] = f'{figure_path}/pct_access_500m_pt_{language}.jpg' - template[ - 'pct_access_500m_public_open_space_large_score' - ] = f'{figure_path}/pct_access_500m_public_open_space_large_score_{language}.jpg' - template['city_text'] = phrases[f'{city} - Summary'] - ## Checklist ratings for PT and POS - for analysis in ['PT', 'POS']: - for i, policy in enumerate(city_policy[analysis].index): - row = i + 1 - for j, item in enumerate([x for x in city_policy[analysis][i]]): - col = j + 1 - template[f'policy_{analysis}_text{row}_response{col}'] = item - template.render() - # Set up last page - pdf.add_page() - template = FlexTemplate(pdf, elements=pages['6']) - template.render() - return pdf diff --git a/process/subprocesses/_utils.py b/process/subprocesses/_utils.py index bf3bc10a..e33da1db 100644 --- a/process/subprocesses/_utils.py +++ b/process/subprocesses/_utils.py @@ -1,42 +1,1335 @@ -""" -Utility functions. - -Purpose: These functions may be used in other scripts to undertake specific tasks. -Context: Liveability indicator calculation (general tools for data wrangling) - -""" - - -def reproject_raster(inpath, outpath, new_crs): - import rasterio - from rasterio.warp import ( - Resampling, - calculate_default_transform, - reproject, - ) - - dst_crs = new_crs # CRS for web meractor - with rasterio.open(inpath) as src: - transform, width, height = calculate_default_transform( - src.crs, dst_crs, src.width, src.height, *src.bounds, - ) - kwargs = src.meta.copy() - kwargs.update( - { - 'crs': dst_crs, - 'transform': transform, - 'width': width, - 'height': height, - }, - ) - with rasterio.open(outpath, 'w', **kwargs) as dst: - for i in range(1, src.count + 1): - reproject( - source=rasterio.band(src, i), - destination=rasterio.band(dst, i), - src_transform=src.transform, - src_crs=src.crs, - dst_transform=transform, - dst_crs=dst_crs, - resampling=Resampling.nearest, - ) +""" +Report functions. + +Define functions used for formatting and saving indicator reports. +""" +import json +import os +import subprocess as sp +import time +from textwrap import wrap + +import fiona +import geopandas as gpd +import matplotlib as mpl +import matplotlib.font_manager as fm +import matplotlib.pyplot as plt +import matplotlib.ticker as ticker +import numpy as np +import pandas as pd +from babel.numbers import format_decimal as fnum +from babel.units import format_unit +from fpdf import FPDF, FlexTemplate +from matplotlib.cm import ScalarMappable +from matplotlib.lines import Line2D +from mpl_toolkits.axes_grid1 import make_axes_locatable +from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar +from mpl_toolkits.axes_grid1.inset_locator import inset_axes +from subprocesses.batlow import batlow_map as cmap + + +# 'pretty' text wrapping as per https://stackoverflow.com/questions/37572837/how-can-i-make-python-3s-print-fit-the-size-of-the-command-prompt +def get_terminal_columns(): + import shutil + + return shutil.get_terminal_size().columns + + +def print_autobreak(*args, sep=' '): + import textwrap + + width = ( + get_terminal_columns() + ) # Check size once to avoid rechecks per "paragraph" + # Convert all args to strings, join with separator, then split on any newlines, + # preserving line endings, so each "paragraph" wrapped separately + for line in sep.join(map(str, args)).splitlines(True): + # Py3's print function makes it easy to print textwrap.wrap's result as one-liner + print(*textwrap.wrap(line, width), sep='\n') + + +def wrap_autobreak(*args, sep=' '): + width = ( + get_terminal_columns() + ) # Check size once to avoid rechecks per "paragraph" + # Convert all args to strings, join with separator, then split on any newlines, + # preserving line endings, so each "paragraph" wrapped separately + for line in sep.join(map(str, args)).splitlines(True): + # Py3's print function makes it easy to print textwrap.wrap's result as one-liner + return '\n'.join(textwrap.wrap(line, width)) + + +def generate_metadata_yml( + engine, + folder_path, + region_config, + codename, + name, + year, + authors, + url, + individualname, + positionname, + email, +): + """Generate YAML metadata control file.""" + sql = """SELECT ST_Extent(ST_Transform(geom,4326)) FROM urban_study_region;""" + from sqlalchemy import text + + with engine.begin() as connection: + bbox = ( + connection.execute(text(sql)) + .fetchone()[0] + .replace(' ', ',') + .replace('(', '[') + .replace(')', ']')[3:] + ) + + yml = f'{folder_path}/process/configuration/assets/metadata_template.yml' + region_dir = region_config['region_dir'] + datestamp = time.strftime('%Y-%m-%d') + dateyear = time.strftime('%Y') + spatial_bbox = bbox + spatial_crs = 'WGS84' + + with open(yml) as f: + metadata = f.read() + + metadata = metadata.format(**locals()) + metadata = ( + f'# {name} ({codename})\n' + f'# YAML metadata control file (MCF) template for pygeometa\n{metadata}' + ) + metadata_yml = f'{region_dir}/{codename}_metadata.yml' + with open(metadata_yml, 'w') as f: + f.write(metadata) + return os.path.basename(metadata_yml) + + +def generate_metadata_xml(region_dir, codename): + """Generate xml metadata given a yml metadata control file as per the specification required by pygeometa.""" + yml_in = f'{region_dir}/{codename}_metadata.yml' + xml_out = f'{region_dir}/{codename}_metadata.xml' + command = f'pygeometa metadata generate "{yml_in}" --output "{xml_out}" --schema iso19139-2' + sp.call(command, shell=True) + return os.path.basename(xml_out) + + +def postgis_to_csv(file, db_host, db_user, db, db_pwd, table): + """Export table from PostGIS database to CSV.""" + command = ( + f'ogr2ogr -f "CSV" {file} ' + f'PG:"host={db_host} user={db_user} dbname={db} password={db_pwd}" ' + f' {table} ' + ) + sp.call(command, shell=True) + return file + + +def postgis_to_geopackage(gpkg, db_host, db_user, db, db_pwd, tables): + """Export selection of tables from PostGIS database to geopackage.""" + try: + os.remove(gpkg) + except FileNotFoundError: + pass + + for table in tables: + print(f' - {table}') + command = ( + f'ogr2ogr -update -overwrite -lco overwrite=yes -f GPKG {gpkg} ' + f'PG:"host={db_host} user={db_user} dbname={db} password={db_pwd}" ' + f' {table} ' + ) + sp.call(command, shell=True) + + +def get_and_setup_language_cities(config): + """Setup and return languages for given configuration.""" + if config.auto_language: + languages = pd.read_excel(config.configuration, sheet_name='languages') + languages = languages[languages['name'] == config.city].dropna( + axis=1, how='all', + ) + languages = list(languages.columns[2:]) + else: + languages = [config.language] + return languages + + +def generate_report_for_language( + config, language, indicators, policies, +): + """Generate report for a processed city in a given language.""" + city = config.city + font = get_and_setup_font(language, config) + # set up policies + city_policy = policy_data_setup(policies, config.region['policy_review']) + # get city and grid summary data + gpkg = config.region['gpkg'] + if not os.path.isfile(gpkg): + raise Exception( + f'\n\nRequired file {gpkg} could not be located. To proceed with report generation, please ensure that analysis for this region, including export of geopackage, has been completed successfully.\n\n', + ) + layers = fiona.listlayers(gpkg) + if not any([x.startswith(city) for x in layers]): + raise Exception( + f'\n\nResult summary layers for grid and city could not be located in the geopackage {gpkg}. To proceed with report generation, please ensure that analysis for this region has been completed successfully.\n\n', + ) + gdfs = {} + for gdf in ['city', 'grid']: + gdfs[gdf] = gpd.read_file( + gpkg, + layer=[ + layer + for layer in layers + if layer.startswith( + config.region[f'{gdf}_summary'].strip( + time.strftime('%Y-%m-%d'), + ), + ) + ][0], + ) + # The below currently relates walkability to specified reference + # (e.g. the GHSCIC 25 city median, following standardisation using + # 25-city mean and standard deviation for sub-indicators) + gdfs['grid'] = evaluate_comparative_walkability( + gdfs['grid'], indicators['report']['walkability']['ghscic_reference'], + ) + indicators['report']['walkability'][ + 'walkability_above_median_pct' + ] = evaluate_threshold_pct( + gdfs['grid'], + 'all_cities_walkability', + '>', + indicators['report']['walkability']['ghscic_walkability_reference'], + ) + for i in indicators['report']['thresholds']: + indicators['report']['thresholds'][i]['pct'] = evaluate_threshold_pct( + gdfs['grid'], + indicators['report']['thresholds'][i]['field'], + indicators['report']['thresholds'][i]['relationship'], + indicators['report']['thresholds'][i]['criteria'], + ) + # set up phrases + phrases = prepare_phrases(config, city, language) + # Generate resources + if config.generate_resources: + print(f'\nFigures and maps ({language})') + capture_return = generate_resources( + config, + gdfs['city'], + gdfs['grid'], + phrases, + indicators, + city_policy, + language, + cmap, + ) + # instantiate template + for template in config.templates: + print(f'\nReport ({template} PDF template; {language})') + capture_return = generate_scorecard( + config, phrases, indicators, city_policy, language, template, font, + ) + print(capture_return) + + +def get_and_setup_font(language, config): + """Setup and return font for given language configuration.""" + fonts = pd.read_excel(config.configuration, sheet_name='fonts') + if language.replace(' (Auto-translation)', '') in fonts.Language.unique(): + fonts = fonts.loc[ + fonts['Language'] == language.replace(' (Auto-translation)', '') + ].fillna('') + else: + fonts = fonts.loc[fonts['Language'] == 'default'].fillna('') + main_font = fonts.File.values[0].strip() + fm.fontManager.addfont(main_font) + prop = fm.FontProperties(fname=main_font) + fm.findfont(prop=prop, directory=main_font, rebuild_if_missing=True) + plt.rcParams['font.family'] = prop.get_name() + font = fonts.Font.values[0] + return font + + +def policy_data_setup(policies, policy_review): + """Returns a dictionary of policy data.""" + review = pd.read_excel(policy_review, index_col=0) + df_policy = {} + # Presence score + df_policy['Presence_rating'] = review.loc['Score']['Policy identified'] + # Quality score + df_policy['Checklist_rating'] = review.loc['Score']['Quality'] + # Presence + df_policy['Presence'] = review.loc[ + [p['Policy'] for p in policies if p['Display'] == 'Presence'] + ].apply(lambda x: x['Weight'] * x['Policy identified'], axis=1) + # GDP + df_policy['Presence_gdp'] = pd.DataFrame( + [ + { + c: p[c] + for c in p + if c + in ['Label', 'gdp_comparison_middle', 'gdp_comparison_upper'] + } + for p in policies + if p['Display'] == 'Presence' + ], + ) + df_policy['Presence_gdp'].columns = ['Policy', 'middle', 'upper'] + df_policy['Presence_gdp'].set_index('Policy', inplace=True) + # Urban Checklist + df_policy['Checklist'] = review.loc[ + [p['Policy'] for p in policies if p['Display'] == 'Checklist'] + ]['Checklist'] + # Public open space checklist + df_policy['POS'] = review.loc[ + [p['Policy'] for p in policies if p['Display'] == 'POS'] + ]['Checklist'] + # Public transport checklist + df_policy['PT'] = review.loc[ + [p['Policy'] for p in policies if p['Display'] == 'PT'] + ]['Checklist'] + return df_policy + + +def evaluate_comparative_walkability(gdf_grid, reference): + """Evaluate walkability relative to 25-city study reference.""" + for x in reference: + gdf_grid[f'z_{x}'] = (gdf_grid[x] - reference[x]['mean']) / reference[ + x + ]['sd'] + gdf_grid['all_cities_walkability'] = sum( + [gdf_grid[f'z_{x}'] for x in reference], + ) + return gdf_grid + + +def evaluate_threshold_pct( + gdf_grid, indicator, relationship, reference, population='pop_est', +): + """Evaluate whether a pandas series meets a threshold criteria (eg. '<' or '>'.""" + percentage = round( + 100 + * gdf_grid.query(f'{indicator} {relationship} {reference}')[ + population + ].sum() + / gdf_grid[population].sum(), + 1, + ) + return percentage + + +def generate_resources( + config, + gdf_city, + gdf_grid, + phrases, + indicators, + city_policy, + language, + cmap, +): + """ + The function prepares a series of image resources required for the global indicator score cards. + + The city_path string variable is returned, where generated resources will be stored upon successful execution. + """ + figure_path = f'{config.region["region_dir"]}/figures' + locale = phrases['locale'] + city_stats = compile_city_stats(gdf_city, indicators, phrases) + if not os.path.exists(figure_path): + os.mkdir(figure_path) + # Spatial access liveability profile + li_profile( + city_stats=city_stats, + title=phrases['Population % with access within 500m to...'], + cmap=cmap, + phrases=phrases, + path=f'{figure_path}/access_profile_{language}.jpg', + ) + print(f' figures/access_profile_{language}.jpg') + ## constrain extreme outlying walkability for representation + gdf_grid['all_cities_walkability'] = gdf_grid[ + 'all_cities_walkability' + ].apply(lambda x: -6 if x < -6 else (6 if x > 6 else x)) + # Spatial distribution maps + spatial_maps = compile_spatial_map_info( + indicators['report']['spatial_distribution_figures'], + gdf_city, + phrases, + locale, + language=language, + ) + for f in spatial_maps: + spatial_dist_map( + gdf_grid, + column=f, + range=spatial_maps[f]['range'], + label=spatial_maps[f]['label'], + tick_labels=spatial_maps[f]['tick_labels'], + cmap=cmap, + path=f'{figure_path}/{spatial_maps[f]["outfile"]}', + phrases=phrases, + locale=locale, + ) + print(f" figures/{spatial_maps[f]['outfile']}") + # Threshold maps + for scenario in indicators['report']['thresholds']: + threshold_map( + gdf_grid, + column=indicators['report']['thresholds'][scenario]['field'], + scale=indicators['report']['thresholds'][scenario]['scale'], + comparison=indicators['report']['thresholds'][scenario][ + 'criteria' + ], + label=( + f"{phrases[indicators['report']['thresholds'][scenario]['title']]} ({phrases['density_units']})" + ), + cmap=cmap, + path=f"{figure_path}/{indicators['report']['thresholds'][scenario]['field']}_{language}.jpg", + phrases=phrases, + locale=locale, + ) + print( + f" figures/{indicators['report']['thresholds'][scenario]['field']}_{language}.jpg", + ) + # Policy ratings + policy_rating( + range=[0, 24], + score=city_policy['Presence_rating'], + comparison=indicators['report']['policy']['comparisons']['presence'], + label='', + comparison_label=phrases['25 city comparison'], + cmap=cmap, + locale=locale, + path=f'{figure_path}/policy_presence_rating_{language}.jpg', + ) + print(f' figures/policy_presence_rating_{language}.jpg') + policy_rating( + range=[0, 57], + score=city_policy['Checklist_rating'], + comparison=indicators['report']['policy']['comparisons']['quality'], + label='', + comparison_label=phrases['25 city comparison'], + cmap=cmap, + locale=locale, + path=f'{figure_path}/policy_checklist_rating_{language}.jpg', + ) + print(f' figures/policy_checklist_rating_{language}.jpg') + return figure_path + + +def fpdf2_mm_scale(mm): + """Returns a width double that of the conversion of mm to inches. + + This has been found, via trial and error, to be useful when preparing images for display in generated PDFs using fpdf2. + """ + return 2 * mm / 25.4 + + +def _pct(value, locale, length='short'): + """Formats a percentage sign according to a given locale.""" + return format_unit(value, 'percent', locale=locale, length=length) + + +def compile_city_stats(gdf_city, indicators, phrases): + """Compile a set of city statistics with comparisons, given a processed geodataframe of city summary statistics and a dictionary of indicators including reference percentiles.""" + city_stats = {} + city_stats['access'] = gdf_city[ + indicators['report']['accessibility'].keys() + ].transpose()[0] + city_stats['access'].index = [ + indicators['report']['accessibility'][x]['title'] + if city_stats['access'][x] is not None + else f"{indicators['report']['accessibility'][x]['title']} (not evaluated)" + for x in city_stats['access'].index + ] + city_stats['access'] = city_stats['access'].fillna( + 0, + ) # for display purposes + city_stats['comparisons'] = { + indicators['report']['accessibility'][x]['title']: indicators[ + 'report' + ]['accessibility'][x]['ghscic_reference'] + for x in indicators['report']['accessibility'] + } + city_stats['percentiles'] = {} + for percentile in ['p25', 'p50', 'p75']: + city_stats['percentiles'][percentile] = [ + city_stats['comparisons'][x][percentile] + for x in city_stats['comparisons'].keys() + ] + city_stats['access'].index = [ + phrases[x] for x in city_stats['access'].index + ] + return city_stats + + +def compile_spatial_map_info( + spatial_distribution_figures, gdf_city, phrases, locale, language, +): + """ + Compile required information to produce spatial distribution figures. + + This is done using the information recorded in configuration/indicators.yml; specifically, indicators['report']['spatial_distribution_figures'] + """ + # effectively deep copy the supplied dictionary so its not mutable + spatial_maps = json.loads(json.dumps(spatial_distribution_figures)) + for i in spatial_maps: + for text in ['label', 'outfile']: + spatial_maps[i][text] = spatial_maps[i][text].format(**locals()) + if spatial_maps[i]['tick_labels'] is not None: + spatial_maps[i]['tick_labels'] = [ + x.format(**{'phrases': phrases}) + for x in spatial_maps[i]['tick_labels'] + ] + if i.startswith('pct_'): + city_summary_percent = _pct( + fnum(gdf_city[f'pop_{i}'].fillna(0)[0], '0.0', locale), locale, + ) + spatial_maps[i][ + 'label' + ] = f'{spatial_maps[i]["label"]} ({city_summary_percent})' + if gdf_city['pop_pct_access_500m_pt_gtfs_freq_20_score'][ + 0 + ] is None or pd.isna( + gdf_city['pop_pct_access_500m_pt_gtfs_freq_20_score'][0], + ): + spatial_maps['pct_access_500m_pt_any_score'] = spatial_maps.pop( + 'pct_access_500m_pt_gtfs_freq_20_score', + ) + spatial_maps['pct_access_500m_pt_any_score']['label'] = ( + f'{phrases["Percentage of population with access to public transport"]}\n' + f'({_pct(fnum(gdf_city["pop_pct_access_500m_pt_any_score"][0],"0.0",locale),locale)})' + ) + return spatial_maps + + +def add_scalebar( + ax, + length, + multiplier, + units, + fontproperties, + loc='upper left', + pad=0, + color='black', + frameon=False, + size_vertical=2, + locale='en', +): + """ + Adds a scalebar to matplotlib map. + + Requires import of: from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar + As a rule of thumb, a scalebar of 1/3 of feature size seems appropriate. + For example, to achieve this, calculate the variable 'length' as + + gdf_width = gdf.geometry.total_bounds[2] - gdf.geometry.total_bounds[0] + scalebar_length = int(gdf_width / (3000)) + """ + scalebar = AnchoredSizeBar( + ax.transData, + length * multiplier, + format_unit(length, units, locale=locale, length='short'), + loc=loc, + pad=pad, + color=color, + frameon=frameon, + size_vertical=size_vertical, + fontproperties=fontproperties, + ) + ax.add_artist(scalebar) + + +def add_localised_north_arrow( + ax, + text='N', + xy=(1, 0.96), + textsize=14, + arrowprops=dict(facecolor='black', width=4, headwidth=8), +): + """ + Add a minimal north arrow with custom text label above it to a matplotlib map. + + This can be used to add, for example, 'N' or other language equivalent. Default placement is in upper right corner of map. + """ + arrow = ax.annotate( + '', + xy=(1, 0.96), + xycoords=ax.transAxes, + xytext=(0, -0.5), + textcoords='offset pixels', + va='center', + ha='center', + arrowprops=arrowprops, + ) + ax.annotate( + text, + xy=(0.5, 1.5), + xycoords=arrow, + va='center', + ha='center', + fontsize=textsize, + ) + + +## radar chart +def li_profile( + city_stats, + title, + cmap, + path, + phrases, + width=fpdf2_mm_scale(80), + height=fpdf2_mm_scale(80), + dpi=300, +): + """ + Generates a radar chart for city liveability profiles. + + Expands on https://www.python-graph-gallery.com/web-circular-barplot-with-matplotlib + -- A python code blog post by Yan Holtz, in turn expanding on work of Tomás Capretto and Tobias Stadler. + """ + figsize = (width, height) + # Values for the x axis + ANGLES = np.linspace( + 0.15, 2 * np.pi - 0.05, len(city_stats['access']), endpoint=False, + ) + VALUES = city_stats['access'].values + COMPARISON = city_stats['percentiles']['p50'] + INDICATORS = city_stats['access'].index + # Colours + GREY12 = '#1f1f1f' + norm = mpl.colors.Normalize(vmin=0, vmax=100) + COLORS = cmap(list(norm(VALUES))) + # Initialize layout in polar coordinates + textsize = 11 + fig, ax = plt.subplots(figsize=figsize, subplot_kw={'projection': 'polar'}) + # Set background color to white, both axis and figure. + fig.patch.set_facecolor('white') + ax.set_facecolor('white') + ax.set_theta_offset(1.2 * np.pi / 2) + ax.set_ylim(-50, 125) + # Add geometries to the plot ------------------------------------- + # Add bars to represent the cumulative track lengths + ax.bar(ANGLES, VALUES, color=COLORS, alpha=0.9, width=0.52, zorder=10) + # Add interquartile comparison reference lines + ax.vlines( + ANGLES, + city_stats['percentiles']['p25'], + city_stats['percentiles']['p75'], + color=GREY12, + zorder=11, + ) + # Add dots to represent the mean gain + comparison_text = '\n'.join( + wrap(phrases['25 city comparison'], 17, break_long_words=False), + ) + ax.scatter( + ANGLES, + COMPARISON, + s=60, + color=GREY12, + zorder=11, + label=comparison_text, + ) + # Add labels for the indicators + try: + LABELS = [ + '\n'.join(wrap(r, 12, break_long_words=False)) for r in INDICATORS + ] + except Exception: + LABELS = INDICATORS + # Set the labels + ax.set_xticks(ANGLES) + ax.set_xticklabels(LABELS, size=textsize) + # Remove lines for polar axis (x) + ax.xaxis.grid(False) + # Put grid lines for radial axis (y) at 0, 1000, 2000, and 3000 + ax.set_yticklabels([]) + ax.set_yticks([0, 25, 50, 75, 100]) + # Remove spines + ax.spines['start'].set_color('none') + ax.spines['polar'].set_color('none') + # Adjust padding of the x axis labels ---------------------------- + # This is going to add extra space around the labels for the + # ticks of the x axis. + XTICKS = ax.xaxis.get_major_ticks() + for tick in XTICKS: + tick.set_pad(10) + # Add custom annotations ----------------------------------------- + # The following represent the heights in the values of the y axis + PAD = 0 + for num in [0, 50, 100]: + ax.text( + -0.2 * np.pi / 2, + num + PAD, + f'{num}%', + ha='center', + va='center', + backgroundcolor='white', + size=textsize, + ) + # Add text to explain the meaning of the height of the bar and the + # height of the dot + ax.text( + ANGLES[0], + -50, + '\n'.join(wrap(title, 13, break_long_words=False)), + rotation=0, + ha='center', + va='center', + size=textsize, + zorder=12, + ) + angle = np.deg2rad(130) + ax.legend( + loc='lower right', + bbox_to_anchor=(0.58 + np.cos(angle) / 2, 0.46 + np.sin(angle) / 2), + ) + fig.savefig(path, dpi=dpi) + plt.close(fig) + + +## Spatial distribution mapping +def spatial_dist_map( + gdf, + column, + range, + label, + tick_labels, + cmap, + path, + width=fpdf2_mm_scale(88), + height=fpdf2_mm_scale(80), + dpi=300, + phrases={'north arrow': 'N', 'km': 'km'}, + locale='en', +): + """Spatial distribution maps using geopandas geodataframe.""" + figsize = (width, height) + textsize = 14 + fig, ax = plt.subplots(figsize=figsize) + ax.set_axis_off() + divider = make_axes_locatable(ax) # Define 'divider' for the axes + # Legend axes will be located at the 'bottom' of figure, with width '5%' of ax and + # a padding between them equal to '0.1' inches + cax = divider.append_axes('bottom', size='5%', pad=0.1) + gdf.plot( + column=column, + ax=ax, + legend=True, + vmin=range[0], + vmax=range[1], + legend_kwds={ + 'label': '\n'.join(wrap(label, 60, break_long_words=False)) + if label.find('\n') < 0 + else label, + 'orientation': 'horizontal', + }, + cax=cax, + cmap=cmap, + ) + # scalebar + add_scalebar( + ax, + length=int( + (gdf.geometry.total_bounds[2] - gdf.geometry.total_bounds[0]) + / (3000), + ), + multiplier=1000, + units='kilometer', + locale=locale, + fontproperties=fm.FontProperties(size=textsize), + ) + # north arrow + add_localised_north_arrow(ax, text=phrases['north arrow']) + # axis formatting + cax.tick_params(labelsize=textsize) + cax.xaxis.label.set_size(textsize) + if tick_labels is not None: + # cax.set_xticks(cax.get_xticks().tolist()) + # cax.set_xticklabels(tick_labels) + cax.xaxis.set_major_locator(ticker.MaxNLocator(len(tick_labels))) + ticks_loc = cax.get_xticks().tolist() + cax.xaxis.set_major_locator(ticker.FixedLocator(ticks_loc)) + cax.set_xticklabels(tick_labels) + plt.tight_layout() + fig.savefig(path, dpi=dpi) + plt.close(fig) + + +def threshold_map( + gdf, + column, + comparison, + scale, + label, + cmap, + path, + width=fpdf2_mm_scale(88), + height=fpdf2_mm_scale(80), + dpi=300, + phrases={'north arrow': 'N', 'km': 'km'}, + locale='en', +): + """Create threshold indicator map.""" + figsize = (width, height) + textsize = 14 + fig, ax = plt.subplots(figsize=figsize) + ax.set_axis_off() + divider = make_axes_locatable(ax) # Define 'divider' for the axes + # Legend axes will be located at the 'bottom' of figure, with width '5%' of ax and + # a padding between them equal to '0.1' inches + cax = divider.append_axes('bottom', size='5%', pad=0.1) + gdf.plot( + column=column, + ax=ax, + legend=True, + legend_kwds={ + 'label': '\n'.join(wrap(label, 60, break_long_words=False)) + if label.find('\n') < 0 + else label, + 'orientation': 'horizontal', + }, + cax=cax, + cmap=cmap, + ) + # scalebar + add_scalebar( + ax, + length=int( + (gdf.geometry.total_bounds[2] - gdf.geometry.total_bounds[0]) + / (3000), + ), + multiplier=1000, + units='kilometer', + locale=locale, + fontproperties=fm.FontProperties(size=textsize), + ) + # north arrow + add_localised_north_arrow(ax, text=phrases['north arrow']) + # axis formatting + cax.xaxis.set_major_formatter(ticker.EngFormatter()) + cax.tick_params(labelsize=textsize) + cax.xaxis.label.set_size(textsize) + plt.tight_layout() + fig.savefig(path, dpi=dpi) + plt.close(fig) + + +def policy_rating( + range, + score, + cmap, + comparison=None, + width=fpdf2_mm_scale(70), + height=fpdf2_mm_scale(15), + label='Policies identified', + comparison_label='25 city median', + locale='en', + path='policy_rating_test.jpg', + dpi=300, +): + """ + Plot a score (policy rating) and optional comparison (e.g. 25 cities median score) on a colour bar. + + Applied in this context for policy presence and policy quality scores. + """ + textsize = 14 + fig, ax = plt.subplots(figsize=(width, height)) + fig.subplots_adjust(bottom=0) + cmap = cmap + norm = mpl.colors.Normalize(vmin=range[0], vmax=range[1]) + fig.colorbar( + mpl.cm.ScalarMappable(norm=norm, cmap=cmap), + cax=ax, + orientation='horizontal', + # shrink=0.9, pad=0, aspect=90 + ) + # Format Global ticks + if comparison is None: + ax.xaxis.set_ticks([]) + else: + ax.xaxis.set_major_locator(ticker.FixedLocator([comparison])) + # ax.set_xticklabels([comparison_label]) + ax.set_xticklabels(['']) + ax.tick_params(labelsize=textsize) + ax.plot( + comparison, + 0, + marker='v', + color='black', + markersize=9, + zorder=10, + clip_on=False, + ) + if comparison < 7: + for t in ax.get_yticklabels(): + t.set_horizontalalignment('left') + if comparison > 18: + for t in ax.get_yticklabels(): + t.set_horizontalalignment('right') + # Format City ticks + ax_city = ax.twiny() + ax_city.set_xlim(range) + ax_city.xaxis.set_major_locator(ticker.FixedLocator([score])) + ax_city.plot( + score, + 1, + marker='^', + color='black', + markersize=9, + zorder=10, + clip_on=False, + ) + sep = '' + # if comparison is not None and label=='': + ax_city.set_xticklabels( + [f"{sep}{str(score).rstrip('0').rstrip('.')}/{range[1]}{label}"], + ) + ax_city.tick_params(labelsize=textsize) + # return figure with final styling + xlabel = f"{comparison_label} ({fnum(comparison,'0.0',locale)})" + ax.set_xlabel( + xlabel, labelpad=0.5, fontsize=textsize, + ) + plt.tight_layout() + fig.savefig(path, dpi=dpi) + plt.close(fig) + + +def pdf_template_setup( + config, template='template_web', font=None, language='English', +): + """ + Takes a template xlsx sheet defining elements for use in fpdf2's FlexTemplate function. + + This is loosely based on the specification at https://pyfpdf.github.io/fpdf2/Templates.html + However, it has been modified to allow additional definitions which are parsed + by this function + - can define the page for which template elements are to be applied + - colours are specified using standard hexadecimal codes + Any blank cells are set to represent "None". + The function returns a dictionary of elements, indexed by page number strings. + """ + # read in elements + elements = pd.read_excel(config.configuration, sheet_name=template) + document_pages = elements.page.unique() + # Conditional formatting to help avoid inappropriate line breaks and gaps in Tamil and Thai + if language in ['Tamil', 'Thai']: + elements['align'] = elements['align'].replace('J', 'L') + elements.loc[ + (elements['type'] == 'T') & (elements['size'] < 12), 'size', + ] = ( + elements.loc[ + (elements['type'] == 'T') & (elements['size'] < 12), 'size', + ] + - 1 + ) + if font is not None: + elements.loc[elements.font == 'custom', 'font'] = font + elements = elements.to_dict(orient='records') + elements = [ + {k: v if not str(v) == 'nan' else None for k, v in x.items()} + for x in elements + ] + # Need to convert hexadecimal colours (eg FFFFFF is white) to + # decimal colours for the fpdf Template class to work + # We'll establish default hex colours for foreground and background + planes = {'foreground': '000000', 'background': 'FFFFFF'} + for i, element in enumerate(elements): + for plane in planes: + if elements[i][plane] is not None: + # this assumes a hexadecimal string without the 0x prefix + elements[i][plane] = int(elements[i][plane], 16) + else: + elements[i][plane] = int(planes[plane], 16) + pages = {} + for page in document_pages: + pages[f'{page}'] = [x for x in elements if x['page'] == page] + return pages + + +def format_pages(pages, phrases): + """Format pages with phrases.""" + for page in pages: + for i, item in enumerate(pages[page]): + if item['name'] in phrases: + try: + pages[page][i]['text'] = phrases[item['name']].format( + city=phrases['city_name'], + country=phrases['country_name'], + study_doi=phrases['study_doi'], + citation_series=phrases['citation_series'], + citation_doi=phrases['citation_doi'], + citation_population=phrases['citation_population'], + citation_boundaries=phrases['citation_boundaries'], + citation_features=phrases['citation_features'], + citation_colour=phrases['citation_colour'], + ) + except Exception: + pages[f'{page}'][i]['text'] = phrases[item['name']] + return pages + + +def prepare_phrases(config, city, language): + """Prepare dictionary for specific language translation given English phrase.""" + languages = pd.read_excel(config.configuration, sheet_name='languages') + phrases = json.loads(languages.set_index('name').to_json())[language] + city_details = pd.read_excel( + config.configuration, sheet_name='city_details', index_col='City', + ) + country_code = config.region['country_code'] + # set default English country code + if language == 'English' and country_code not in ['AU', 'GB', 'US']: + country_code = 'AU' + phrases['locale'] = f'{phrases["language_code"]}_{country_code}' + # extract English language variables + phrases['metadata_author'] = languages.loc[ + languages['name'] == 'title_author', 'English', + ].values[0] + phrases['metadata_title1'] = languages.loc[ + languages['name'] == 'title_series_line1', 'English', + ].values[0] + phrases['metadata_title2'] = languages.loc[ + languages['name'] == 'title_series_line2', 'English', + ].values[0] + phrases['country'] = languages.loc[ + languages['name'] == f'{city} - Country', 'English', + ].values[0] + # restrict to specific language + languages = languages.loc[ + languages['role'] == 'template', ['name', language], + ] + phrases['vernacular'] = languages.loc[ + languages['name'] == 'language', language, + ].values[0] + phrases['city_name'] = languages.loc[ + languages['name'] == city, language, + ].values[0] + phrases['country_name'] = languages.loc[ + languages['name'] == f'{city} - Country', language, + ].values[0] + phrases['city'] = config.region['name'] + phrases['study_doi'] = f'https://doi.org/{city_details["DOI"]["Study"]}' + phrases['city_doi'] = f'https://doi.org/{city_details["DOI"][city]}' + phrases['study_executive_names'] = city_details['Names']['Study'] + phrases['local_collaborators_names'] = city_details['Names'][city] + phrases['Image 1 file'] = city_details['Image 1 file'][city] + phrases['Image 2 file'] = city_details['Image 2 file'][city] + phrases['Image 1 credit'] = city_details['Image 1 credit'][city] + phrases['Image 2 credit'] = city_details['Image 2 credit'][city] + phrases['region_population_citation'] = config.region['population'][ + 'citation' + ] + phrases['region_urban_region_citation'] = config.region['urban_region'][ + 'citation' + ] + phrases['region_OpenStreetMap_citation'] = config.region['OpenStreetMap'][ + 'citation' + ] + # incoporating study citations + citation_json = json.loads(city_details['exceptions_json']['Study']) + # handle city-specific exceptions + city_exceptions = json.loads(city_details['exceptions_json'][city]) + if language in city_exceptions: + city_exceptions = json.loads( + city_exceptions[language].replace("'", '"'), + ) + for e in city_exceptions: + phrases[e] = city_exceptions[e].replace('|', '\n') + for citation in citation_json: + if citation != 'citation_doi' or 'citation_doi' not in phrases: + phrases[citation] = ( + citation_json[citation].replace('|', '\n').format(**phrases) + ) + phrases['citation_doi'] = phrases['citation_doi'].format(**phrases) + return phrases + + +def wrap_sentences(words, limit=50, delimiter=''): + """Wrap sentences if exceeding limit.""" + sentences = [] + sentence = '' + gap = len(delimiter) + for i, word in enumerate(words): + if i == 0: + sentence = word + continue + # combine word to sentence if under limit + if len(sentence) + gap + len(word) <= limit: + sentence = sentence + delimiter + word + else: + sentences.append(sentence) + sentence = word + # append the final word if not yet appended + if i == len(words) - 1: + sentences.append(sentence) + # finally, append sentence of all words if still below limit + if (i == len(words) - 1) and (sentences == []): + sentences.append(sentence) + return sentences + + +def prepare_pdf_fonts(pdf, config, language): + """Prepare PDF fonts.""" + fonts = pd.read_excel(config.configuration, sheet_name='fonts') + fonts = ( + fonts.loc[ + fonts['Language'].isin( + ['default', language.replace(' (Auto-translation)', '')], + ) + ] + .fillna('') + .drop_duplicates() + ) + for s in ['', 'B', 'I', 'BI']: + for langue in ['default', language]: + if ( + langue.replace(' (Auto-translation)', '') + in fonts.Language.unique() + ): + f = fonts.loc[ + ( + fonts['Language'] + == langue.replace(' (Auto-translation)', '') + ) + & (fonts['Style'] == s) + ] + if f'{f.Font.values[0]}{s}' not in pdf.fonts.keys(): + pdf.add_font( + f.Font.values[0], style=s, fname=f.File.values[0], + ) + + +def save_pdf_layout(pdf, folder, template, filename): + """Save a PDF report in template subfolder in specified location.""" + if not os.path.exists(folder): + os.mkdir(folder) + template_folder = f'{folder}/_{template} reports' + if not os.path.exists(template_folder): + os.mkdir(template_folder) + pdf.output(f'{template_folder}/{filename}') + return f' _{template} reports/{filename}'.replace('/home/ghsci/work/', '') + + +def generate_scorecard( + config, + phrases, + indicators, + city_policy, + language='English', + template='web', + font=None, +): + """ + Format a PDF using the pyfpdf FPDF2 library, and drawing on definitions from a UTF-8 CSV file. + + Included in this function is the marking of a policy 'scorecard', with ticks, crosses, etc. + """ + locale = phrases['locale'] + # Set up PDF document template pages + pages = pdf_template_setup(config, 'template_web', font, language) + pages = format_pages(pages, phrases) + # initialise PDF + pdf = FPDF(orientation='portrait', format='A4', unit='mm') + # set up fonts + prepare_pdf_fonts(pdf, config, language) + pdf.set_author(phrases['metadata_author']) + pdf.set_title(f"{phrases['metadata_title1']} {phrases['metadata_title2']}") + pdf.set_auto_page_break(False) + if template == 'web': + pdf = pdf_for_web( + pdf, + pages, + config, + language, + locale, + phrases, + indicators, + city_policy, + ) + elif template == 'print': + pdf = pdf_for_print( + pdf, + pages, + config, + language, + locale, + phrases, + indicators, + city_policy, + ) + # Output report pdf + filename = f"{phrases['city_name']} - {phrases['title_series_line1'].replace(':','')} - GHSCIC 2022 - {phrases['vernacular']}.pdf" + if phrases['_export'] == 1: + capture_result = save_pdf_layout( + pdf, + folder=config.region['region_dir'], + template=template, + filename=filename, + ) + return capture_result + else: + return 'Skipped.' + + +def pdf_for_web( + pdf, pages, config, language, locale, phrases, indicators, city_policy, +): + """ + Generate a PDF based on a template for web distribution. + + This template includes reporting on both policy and spatial indicators. + """ + city = config.city + city_path = config.region['region_dir'] + figure_path = f'{city_path}/figures' + # Set up Cover page + pdf.add_page() + template = FlexTemplate(pdf, elements=pages['1']) + if os.path.exists( + f'{config.folder_path}/process/configuration/assets/{phrases["Image 1 file"]}', + ): + template[ + 'hero_image' + ] = f'{config.folder_path}/process/configuration/assets/{phrases["Image 1 file"]}' + template['hero_alt'] = '' + template['Image 1 credit'] = phrases['Image 1 credit'] + template.render() + # Set up next page + pdf.add_page() + template = FlexTemplate(pdf, elements=pages['2']) + template['citations'] = phrases['citations'] + template['study_executive_names'] = phrases['study_executive_names'] + template['local_collaborators'] = template['local_collaborators'].format( + title_city=phrases['title_city'], + ) + template['local_collaborators_names'] = phrases[ + 'local_collaborators_names' + ] + if phrases['translation_names'] is None: + template['translation'] = '' + template['translation_names'] = '' + template.render() + # Set up next page + pdf.add_page() + template = FlexTemplate(pdf, elements=pages['3']) + template[ + 'introduction' + ] = f"{phrases['series_intro']}\n\n{phrases['series_interpretation']}".format( + **phrases, + ) + ## Access profile plot + template['access_profile'] = f'{figure_path}/access_profile_{language}.jpg' + ## Walkability plot + template[ + 'all_cities_walkability' + ] = f'{figure_path}/all_cities_walkability_{language}.jpg' + template['walkability_above_median_pct'] = phrases[ + 'walkability_above_median_pct' + ].format( + _pct( + fnum( + indicators['report']['walkability'][ + 'walkability_above_median_pct' + ], + '0.0', + locale, + ), + locale, + ), + ) + ## Policy ratings + template[ + 'presence_rating' + ] = f'{figure_path}/policy_presence_rating_{language}.jpg' + template[ + 'quality_rating' + ] = f'{figure_path}/policy_checklist_rating_{language}.jpg' + template['city_header'] = phrases['city_name'] + ## City planning requirement presence (round 0.5 up to 1) + policy_indicators = {0: '✗', 0.5: '~', 1: '✓'} + for x in range(1, 7): + # check presence + template[f'policy_urban_text{x}_response'] = policy_indicators[ + city_policy['Presence'][x - 1] + ] + # format percentage units according to locale + for gdp in ['middle', 'upper']: + template[f'policy_urban_text{x}_{gdp}'] = _pct( + float(city_policy['Presence_gdp'].iloc[x - 1][gdp]), + locale, + length='short', + ) + ## Walkable neighbourhood policy checklist + for i, policy in enumerate(city_policy['Checklist'].index): + row = i + 1 + for j, item in enumerate([x for x in city_policy['Checklist'][i]]): + col = j + 1 + template[f"policy_{'Checklist'}_text{row}_response{col}"] = item + template.render() + # Set up next page + pdf.add_page() + template = FlexTemplate(pdf, elements=pages['4']) + ## Density plots + template[ + 'local_nh_population_density' + ] = f'{figure_path}/local_nh_population_density_{language}.jpg' + template[ + 'local_nh_intersection_density' + ] = f'{figure_path}/local_nh_intersection_density_{language}.jpg' + ## Density threshold captions + for scenario in indicators['report']['thresholds']: + template[scenario] = phrases[f'optimal_range - {scenario}'].format( + _pct( + fnum( + indicators['report']['thresholds'][scenario]['pct'], + '0.0', + locale, + ), + locale, + ), + fnum( + indicators['report']['thresholds'][scenario]['criteria'], + '#,000', + locale, + ), + phrases['density_units'], + ) + if os.path.exists( + f'{config.folder_path}/process/configuration/assets/{phrases["Image 2 file"]}', + ): + template[ + 'hero_image_2' + ] = f'{config.folder_path}/process/configuration/assets/{phrases["Image 2 file"]}' + template['hero_alt_2'] = '' + template['Image 2 credit'] = phrases['Image 2 credit'] + template.render() + # Set up next page + pdf.add_page() + template = FlexTemplate(pdf, elements=pages['5']) + template[ + 'pct_access_500m_pt.jpg' + ] = f'{figure_path}/pct_access_500m_pt_{language}.jpg' + template[ + 'pct_access_500m_public_open_space_large_score' + ] = f'{figure_path}/pct_access_500m_public_open_space_large_score_{language}.jpg' + template['city_text'] = phrases[f'{city} - Summary'] + ## Checklist ratings for PT and POS + for analysis in ['PT', 'POS']: + for i, policy in enumerate(city_policy[analysis].index): + row = i + 1 + for j, item in enumerate([x for x in city_policy[analysis][i]]): + col = j + 1 + template[f'policy_{analysis}_text{row}_response{col}'] = item + template.render() + # Set up last page + pdf.add_page() + template = FlexTemplate(pdf, elements=pages['6']) + template.render() + return pdf diff --git a/process/subprocesses/script_running_log.py b/process/subprocesses/script_running_log.py index 757c0d77..0c39a5c8 100644 --- a/process/subprocesses/script_running_log.py +++ b/process/subprocesses/script_running_log.py @@ -12,17 +12,27 @@ import psycopg2 # Import custom variables for National Liveability indicator process -from _project_setup import db, db_host, db_port, db_pwd, db_user +from _project_setup import ( + analysis_timezone, + db, + db_host, + db_port, + db_pwd, + db_user, +) + +os.environ['TZ'] = analysis_timezone +time.tzset() -# Define script logging to study region database function def script_running_log(script='', task='', start='', prefix=''): + """Define script logging to study region database function.""" # Initialise postgresql connection conn = psycopg2.connect( dbname=db, user=db_user, password=db_pwd, host=db_host, port=db_port, ) curs = conn.cursor() - date_time = time.strftime('%Y%m%d-%H%M%S') + date_time = time.strftime('%Y-%m-%d_%H%M') duration = (time.time() - start) / 60 log_table = """ @@ -43,7 +53,7 @@ def script_running_log(script='', task='', start='', prefix=''): curs.execute(log_table) conn.commit() print( - """Processing completed at {}\n- Task: {}\n- Duration: {:04.2f} minutes\n""".format( + """\nProcessing completed at {}\n- Task: {}\n- Duration: {:04.2f} minutes\n""".format( date_time, task, duration, ), ) diff --git a/process/subprocesses/setup_aggr.py b/process/subprocesses/setup_aggr.py index 0f2a6138..e5d18366 100644 --- a/process/subprocesses/setup_aggr.py +++ b/process/subprocesses/setup_aggr.py @@ -9,18 +9,18 @@ import pandas as pd -def calc_grid_pct_sp_indicators(region_dictionary, indicators): - """Caculate sample point weighted grid-level indicators within each city, and save to output geopackage. +def calc_grid_pct_sp_indicators(engine, region_config, indicators): + """Caculate sample point weighted grid-level indicators within each city. Parameters ---------- - region_dictionary: dict - gpkg: path of geopackage file with city resources for inputs and outputs + engine: sql connection + region_config: dict city_name: city full name - grid_summary_output: output name for CSV file and gpkg layer summarising grid results - e.g. {study_region}_grid_{resolution}m_yyyymmdd - city_summary_output: output name for CSV file and gpkg layer summarising city results - {study_region}_city_yyyymmdd + grid_summary_output: output name for CSV file and db layer summarising grid results + e.g. {study_region}_grid_{resolution}m + city_summary_output: output name for CSV file and db layer summarising city results + {study_region}_city indicators: dict output: dict sample_point_variables: list @@ -30,35 +30,39 @@ def calc_grid_pct_sp_indicators(region_dictionary, indicators): ------- String (indicating presumptive success) """ - gpkg = region_dictionary['gpkg'] - # read input geopackage with processed sample point and grid layer - gdf_samplepoint = gpd.read_file(gpkg, layer='samplePointsData') - gdf_samplepoint = gdf_samplepoint[ + # read sample point and grid layer + with engine.connect() as connection: + gdf_grid = gpd.read_postgis( + region_config['population_grid'], connection, index_col='grid_id', + ) + with engine.connect() as connection: + gdf_sample_points = gpd.read_postgis( + region_config['point_summary'], connection, index_col='point_id', + ) + gdf_sample_points = gdf_sample_points[ ['grid_id'] + indicators['output']['sample_point_variables'] ] - gdf_samplepoint.columns = ['grid_id'] + indicators['output'][ + gdf_sample_points.columns = ['grid_id'] + indicators['output'][ 'neighbourhood_variables' ] - gdf_grid = gpd.read_file(gpkg, layer=region_dictionary['population_grid']) - # join urban sample point count to gdf_grid - samplepoint_count = gdf_samplepoint['grid_id'].value_counts() - samplepoint_count.name = 'urban_sample_point_count' - gdf_grid = gdf_grid.join(samplepoint_count, how='inner', on='grid_id') + sample_points_count = gdf_sample_points['grid_id'].value_counts() + sample_points_count.name = 'urban_sample_point_count' + gdf_grid = gdf_grid.join(sample_points_count, how='inner', on='grid_id') # perform aggregation functions to calculate sample point weighted grid cell indicators # to retain indicators which may be all NaN (eg cities absent GTFS data), numeric_only=False - gdf_samplepoint = gdf_samplepoint.groupby('grid_id').mean( + gdf_sample_points = gdf_sample_points.groupby('grid_id').mean( numeric_only=False, ) - gdf_grid = gdf_grid.join(gdf_samplepoint, how='left', on='grid_id') + gdf_grid = gdf_grid.join(gdf_sample_points, how='left', on='grid_id') # scale percentages from proportions pct_fields = [x for x in gdf_grid if x.startswith('pct_access')] gdf_grid[pct_fields] = gdf_grid[pct_fields] * 100 - gdf_grid['study_region'] = region_dictionary['name'] + gdf_grid['study_region'] = region_config['name'] grid_fields = ( indicators['output']['basic_attributes'] @@ -66,19 +70,23 @@ def calc_grid_pct_sp_indicators(region_dictionary, indicators): ) grid_fields = [x for x in grid_fields if x in gdf_grid.columns] - # save the gdf_grid to geopackage - gdf_grid[grid_fields + ['geometry']].to_file( - gpkg, layer=region_dictionary['grid_summary'], driver='GPKG', - ) + # save the grid indicators + with engine.connect() as connection: + gdf_grid[grid_fields + ['geom']].set_geometry('geom').to_postgis( + region_config['grid_summary'], + connection, + index=True, + if_exists='replace', + ) gdf_grid[grid_fields].to_csv( - f"{region_dictionary['region_dir']}/{region_dictionary['grid_summary']}.csv", + f"{region_config['region_dir']}/{region_config['grid_summary']}.csv", index=False, ) return 'Exported gridded small area summary statistics' -def calc_cities_pop_pct_indicators(region_dictionary, indicators): - """Calculate population-weighted city-level indicators, and save to output geopackage. +def calc_cities_pop_pct_indicators(engine, region_config, indicators): + """Calculate population-weighted city-level indicators. These indicators include: 'pop_pct_access_500m_fresh_food_markets', @@ -92,34 +100,37 @@ def calc_cities_pop_pct_indicators(region_dictionary, indicators): Parameters ---------- - region_dictionary: dict - gpkg: path of geopackage file with city resources for inputs and outputs + engine: sql connection + region_config: dict city_name: city full name - grid_summary_output: output name for CSV file and gpkg layer summarising grid results - e.g. {study_region}_grid_{resolution}m_yyyymmdd - city_summary_output: output name for CSV file and gpkg layer summarising city results - {study_region}_city_yyyymmdd + grid_summary_output: output name for CSV file and db layer summarising grid results + e.g. {study_region}_grid_{resolution}m + city_summary_output: output name for CSV file and db layer summarising city results + {study_region}_city indicators: dict - output: dict - sample_point_variables: list - neighbourhood_variables: list - extra_unweighted_vars: list - an optional list of variables to also calculate mean (unweighted) for + output: dict + sample_point_variables: list + neighbourhood_variables: list + extra_unweighted_vars: list + an optional list of variables to also calculate mean (unweighted) for Returns ------- String (indicating presumptive success) """ - gpkg = region_dictionary['gpkg'] - gdf_grid = gpd.read_file(gpkg, layer=region_dictionary['grid_summary']) - gdf_study_region = gpd.read_file(gpkg, layer='urban_study_region') - urban_covariates = gpd.read_file(gpkg, layer='urban_covariates') - + with engine.connect() as connection: + gdf_grid = gpd.read_postgis( + region_config['grid_summary'], connection, index_col='grid_id', + ) + with engine.connect() as connection: + gdf_study_region = gpd.read_postgis('urban_study_region', connection) + with engine.connect() as connection: + urban_covariates = pd.read_sql_table('urban_covariates', connection) # calculate the sum of urban sample point counts for city urban_covariates['urban_sample_point_count'] = gdf_grid[ 'urban_sample_point_count' ].sum() - urban_covariates['geometry'] = gdf_study_region['geometry'] + urban_covariates['geom'] = gdf_study_region['geom'] urban_covariates.crs = gdf_study_region.crs # Map differences in grid names to city names @@ -151,14 +162,16 @@ def calc_cities_pop_pct_indicators(region_dictionary, indicators): ) # order geometry as final column urban_covariates = urban_covariates[ - [x for x in urban_covariates.columns if x != 'geometry'] + ['geometry'] + [x for x in urban_covariates.columns if x != 'geom'] + ['geom'] ] - urban_covariates.to_file( - gpkg, layer=region_dictionary['city_summary'], driver='GPKG', - ) + urban_covariates = urban_covariates.set_geometry('geom') + with engine.connect() as connection: + urban_covariates.to_postgis( + region_config['city_summary'], connection, if_exists='replace', + ) urban_covariates[ - [x for x in urban_covariates.columns if x != 'geometry'] + [x for x in urban_covariates.columns if x != 'geom'] ].to_csv( - f"{region_dictionary['region_dir']}/{region_dictionary['city_summary']}.csv", + f"{region_config['region_dir']}/{region_config['city_summary']}.csv", index=False, ) diff --git a/process/subprocesses/setup_sp.py b/process/subprocesses/setup_sp.py index fb7f642e..5708733f 100644 --- a/process/subprocesses/setup_sp.py +++ b/process/subprocesses/setup_sp.py @@ -110,6 +110,7 @@ def create_pdna_net(gdf_nodes, gdf_edges, predistance=500): def cal_dist_node_to_nearest_pois( gdf_poi, + geometry, distance, network, category_field=None, @@ -125,6 +126,8 @@ def cal_dist_node_to_nearest_pois( ---------- gdf_poi: GeoDataFrame GeoDataFrame of destination point-of-interest + geometry: str + geometry column name distance: int the maximum search distance network: pandana network @@ -145,8 +148,8 @@ def cal_dist_node_to_nearest_pois( ------- GeoDataFrame """ - gdf_poi['x'] = gdf_poi['geometry'].apply(lambda x: x.x) - gdf_poi['y'] = gdf_poi['geometry'].apply(lambda x: x.y) + gdf_poi['x'] = gdf_poi[geometry].apply(lambda x: x.x) + gdf_poi['y'] = gdf_poi[geometry].apply(lambda x: x.y) if category_field is not None and categories is not None: # Calculate distances iterating over categories appended_data = []