diff --git a/process/compare.py b/process/compare.py index 17e9c772..a90ea932 100644 --- a/process/compare.py +++ b/process/compare.py @@ -49,7 +49,7 @@ def compare(r, comparison_codename): dfs[file] = pd.read_csv(files[file]) else: sys.exit( - f"""Compare a reference city to a comparison city, and save the comparison as a CSV file.\n\nThe summary results file ({files[file]}) could not be located.\n\nPlease try again by entering codenames from the list of configured cities {region_names} that have been fully analysed with resources generated:\npython 4_compare.py \n\nAlternatively, enter the shortcut command:\ncompare """, + f"""Compare a reference city to a comparison city, and save the comparison as a CSV file.\n\nThe summary results file ({files[file]}) could not be located.\n\nPlease try again by entering codenames from the list of configured cities {get_region_names()} that have been fully analysed with resources generated:\npython 4_compare.py \n\nAlternatively, enter the shortcut command:\ncompare """, ) # print(pd.concat(dfs).transpose()) comparison = ( diff --git a/process/subprocesses/_04_create_population_grid.py b/process/subprocesses/_04_create_population_grid.py index 4da8fa10..80266a2e 100644 --- a/process/subprocesses/_04_create_population_grid.py +++ b/process/subprocesses/_04_create_population_grid.py @@ -5,209 +5,50 @@ population raster dataset tiles. """ -import os -import subprocess as sp import sys import time # Set up project and region parameters for GHSCIC analyses import ghsci import pandas as pd -from osgeo import gdal from script_running_log import script_running_log -from sqlalchemy import create_engine, inspect, text +from sqlalchemy import text -# disable noisy GDAL logging -# gdal.SetConfigOption('CPL_LOG', 'NUL') # Windows -gdal.SetConfigOption('CPL_LOG', '/dev/null') # Linux/MacOS - -def reproject_raster(inpath, outpath, new_crs): - import rasterio - from rasterio.warp import ( - Resampling, - calculate_default_transform, - reproject, - ) - - dst_crs = new_crs # CRS for web meractor - with rasterio.open(inpath) as src: - transform, width, height = calculate_default_transform( - src.crs, dst_crs, src.width, src.height, *src.bounds, +def population_to_db(r): + if r.config['population']['data_type'].startswith('raster'): + # raster source + r.raster_to_db( + raster='population', + config=r.config['population'], + field='pop_est', + reference_grid=True, ) - kwargs = src.meta.copy() - kwargs.update( - { - 'crs': dst_crs, - 'transform': transform, - 'width': width, - 'height': height, - }, - ) - with rasterio.open(outpath, 'w', **kwargs) as dst: - for i in range(1, src.count + 1): - reproject( - source=rasterio.band(src, i), - destination=rasterio.band(dst, i), - src_transform=src.transform, - src_crs=src.crs, - dst_transform=transform, - dst_crs=dst_crs, - resampling=Resampling.nearest, - ) - - -def extract_population_from_raster(r): - print('Extracting population raster data...') - db = r.config['db'] - db_host = r.config['db_host'] - db_pwd = r.config['db_pwd'] - population_stub = ( - f'{r.config["region_dir"]}/{r.config["population_grid"]}_{r.codename}' - ) - # construct virtual raster table - vrt = f'{r.config["population"]["data_dir"]}/{r.config["population_grid"]}_{r.config["population"]["crs_srid"]}.vrt' - population_raster_clipped = ( - f'{population_stub}_{r.config["population"]["crs_srid"]}.tif' - ) - population_raster_projected = ( - f'{population_stub}_{r.config["crs"]["srid"]}.tif' - ) - print('Raster population dataset...', end='', flush=True) - if not os.path.isfile(vrt): - tif_folder = f'{r.config["population"]["data_dir"]}' - tif_files = [ - os.path.join(tif_folder, file) - for file in os.listdir(tif_folder) - if os.path.splitext(file)[-1] == '.tif' - ] - gdal.BuildVRT(vrt, tif_files) - print(f' has now been indexed ({vrt}).') else: - print(f' has already been indexed ({vrt}).') - print('\nPopulation data clipped to region...', end='', flush=True) - if not os.path.isfile(population_raster_clipped): - # extract study region boundary in projection of tiles - clipping_query = ( - f'SELECT geom FROM {r.config["buffered_urban_study_region"]}' + # vector source + r.ogr_to_db( + source=r.config['population']['data_dir'], + layer=r.config['population_grid'], + promote_to_multi=True, + source_crs=f"{r.config['population']['crs_srid']}", ) - clipping = r.get_gdf(text(clipping_query), geom_col='geom').to_crs( - r.config['population']['crs_srid'], - ) - # get clipping boundary values in required order for gdal translate - bbox = list( - clipping.bounds[['minx', 'maxy', 'maxx', 'miny']].values[0], - ) - # bbox = list(clipping.bounds.values[0]) - gdal.Translate(population_raster_clipped, vrt, projWin=bbox) - print(f' has now been created ({population_raster_clipped}).') - else: - print(f' has already been created ({population_raster_clipped}).') - print('\nPopulation data projected for region...', end='', flush=True) - if not os.path.isfile(population_raster_projected): - # reproject and save the re-projected clipped raster - # (see config file for reprojection function) - reproject_raster( - inpath=population_raster_clipped, - outpath=population_raster_projected, - new_crs=r.config['crs']['srid'], - ) - print(f' has now been created ({population_raster_projected}).') - else: - print(f' has already been created ({population_raster_projected}).') - if r.config['population_grid'] not in r.tables: - print( - f'\nImport population grid {r.config["population_grid"]} to database... ', - end='', - flush=True, - ) - # import raster to postgis and vectorise, as per http://www.brianmcgill.org/postgis_zonal.pdf - command = ( - f'raster2pgsql -d -s {r.config["crs"]["srid"]} -I -Y ' - f"-N {r.config['population']['raster_nodata']} " - f'-t 1x1 {population_raster_projected} {r.config["population_grid"]} ' - f'| PGPASSWORD={db_pwd} psql -U postgres -h {db_host} -d {db} ' - '>> /dev/null' - ) - sp.call(command, shell=True) - print('Done.') - else: - print(f'{r.config["population_grid"]} has been imported to database.') - - -def raster_sql_processing(r): - queries = [ - f"""ALTER TABLE {r.config["population_grid"]} DROP COLUMN rid;""", - f"""DELETE FROM {r.config["population_grid"]} WHERE (ST_SummaryStats(rast)).sum IS NULL;""", - f"""ALTER TABLE {r.config["population_grid"]} ADD grid_id bigserial;""", - f"""ALTER TABLE {r.config["population_grid"]} ADD COLUMN IF NOT EXISTS pop_est int;""", - f"""ALTER TABLE {r.config["population_grid"]} ADD COLUMN IF NOT EXISTS geom geometry;""", - f"""UPDATE {r.config["population_grid"]} SET geom = ST_ConvexHull(rast);""", - f"""CREATE INDEX {r.config["population_grid"]}_ix ON {r.config["population_grid"]} (grid_id);""", - f"""CREATE INDEX {r.config["population_grid"]}_gix ON {r.config["population_grid"]} USING GIST(geom);""", - f"""UPDATE {r.config["population_grid"]} SET pop_est = (ST_SummaryStats(rast)).sum;""", - f"""ALTER TABLE {r.config["population_grid"]} DROP COLUMN rast;""", - ] - for sql in queries: - with r.engine.begin() as connection: - connection.execute(text(sql)) - - -def extract_population_from_vector(r): - print('Extracting population vector data...') - db = r.config['db'] - db_host = r.config['db_host'] - db_port = r.config['db_port'] - db_user = r.config['db_user'] - db_pwd = r.config['db_pwd'] - population_data = r.config['population']['data_dir'] - if '.gpkg:' in population_data: - gpkg = population_data.split(':') - population_data = gpkg[0] - query = gpkg[1] - else: - query = '' - command = ( - ' ogr2ogr -overwrite -progress -f "PostgreSQL" ' - f' PG:"host={db_host} port={db_port} dbname={db}' - f' user={db_user} password={db_pwd}" ' - f' "{population_data}" ' - f' -lco geometry_name="geom" -lco precision=NO ' - f' -t_srs {r.config["crs_srid"]} -nln "{r.config["population_grid"]}" ' - f' -nlt PROMOTE_TO_MULTI' - f' {query}' - ) - print(command) - failure = sp.call(command, shell=True) - if failure == 1: - sys.exit( - f"Error reading in population data '{population_data}' (check format)", - ) - # except Exception as e: - # raise Exception(f'Error reading in boundary data (check format): {e}') + queries = [ + f"""ALTER TABLE {r.config["population_grid"]} ADD grid_id bigserial;""", + f"""ALTER TABLE {r.config["population_grid"]} RENAME {r.config["population"]["vector_population_data_field"]} TO pop_est;""", + f"""CREATE INDEX {r.config["population_grid"]}_ix ON {r.config["population_grid"]} (grid_id);""", + ] + for sql in queries: + with r.engine.begin() as connection: + connection.execute(text(sql)) print('Done.') -def vector_sql_processing(r): - queries = [ - f"""ALTER TABLE {r.config["population_grid"]} ADD grid_id bigserial;""", - f"""ALTER TABLE {r.config["population_grid"]} RENAME {r.config["population"]["vector_population_data_field"]} TO pop_est;""", - ] - for sql in queries: - with r.engine.begin() as connection: - connection.execute(text(sql)) - - def derive_population_grid_variables(r): print( 'Derive population grid variables and summaries... ', end='', flush=True, ) - if r.config['population']['data_type'].startswith('vector'): - vector_sql_processing(r) - else: - raster_sql_processing(r) queries = [ f"""ALTER TABLE {r.config["population_grid"]} ADD COLUMN IF NOT EXISTS area_sqkm float;""", f"""ALTER TABLE {r.config["population_grid"]} ADD COLUMN IF NOT EXISTS pop_per_sqkm float;""", @@ -292,11 +133,9 @@ def create_population_grid(codename): if r.config['population_grid'] in tables: print('Population grid already exists in database.') else: - # population raster set up - if r.config['population']['data_type'].startswith('vector'): - extract_population_from_vector(r) - else: - extract_population_from_raster(r) + # import data + population_to_db(r) + # derive variables derive_population_grid_variables(r) pop = r.get_df(r.config['population_grid']) pd.options.display.float_format = ( diff --git a/process/subprocesses/_08_destination_summary.py b/process/subprocesses/_08_destination_summary.py index 00f4e642..684fab80 100644 --- a/process/subprocesses/_08_destination_summary.py +++ b/process/subprocesses/_08_destination_summary.py @@ -42,7 +42,7 @@ def destination_summary(codename): a.area_sqkm, a.pop_per_sqkm, t.count/a.area_sqkm AS dest_per_sqkm, - t.count/a.area_sqkm/(pop_est/10000) AS dest_per_sqkm_per_10kpop + t.count/a.area_sqkm/(pop_est/10000.0) AS dest_per_sqkm_per_10kpop FROM urban_study_region a, (SELECT d.dest_name_full, COUNT(d.*) count diff --git a/process/subprocesses/_utils.py b/process/subprocesses/_utils.py index 2deac5bb..e426dee5 100644 --- a/process/subprocesses/_utils.py +++ b/process/subprocesses/_utils.py @@ -22,7 +22,6 @@ from mpl_toolkits.axes_grid1 import make_axes_locatable from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar from mpl_toolkits.axes_grid1.inset_locator import inset_axes -from subprocesses.batlow import batlow_map as cmap # 'pretty' text wrapping as per https://stackoverflow.com/questions/37572837/how-can-i-make-python-3s-print-fit-the-size-of-the-command-prompt @@ -266,6 +265,8 @@ def check_and_update_config_reporting_parameters(config): def generate_report_for_language( r, language, indicators, policies, ): + from subprocesses.batlow import batlow_map as cmap + """Generate report for a processed city in a given language.""" font = get_and_setup_font(language, r.config) # set up policies @@ -1570,6 +1571,7 @@ def study_region_map( import cartopy.io.img_tiles as cimgt import cartopy.io.ogc_clients as ogcc from shapely.geometry import box + from subprocesses.batlow import batlow_map as cmap file_name = re.sub(r'\W+', '_', file_name) filepath = f'{region_config["region_dir"]}/figures/{file_name}.png' @@ -1788,3 +1790,38 @@ def buffered_box(total_bounds, distance): buffer_distance = [x * distance for x in mod] new_bounds = [total_bounds[x] + buffer_distance[x] for x in range(0, 4)] return new_bounds + + +def reproject_raster(inpath, outpath, new_crs): + import rasterio + from rasterio.warp import ( + Resampling, + calculate_default_transform, + reproject, + ) + + dst_crs = new_crs # CRS for web meractor + with rasterio.open(inpath) as src: + transform, width, height = calculate_default_transform( + src.crs, dst_crs, src.width, src.height, *src.bounds, + ) + kwargs = src.meta.copy() + kwargs.update( + { + 'crs': dst_crs, + 'transform': transform, + 'width': width, + 'height': height, + }, + ) + with rasterio.open(outpath, 'w', **kwargs) as dst: + for i in range(1, src.count + 1): + reproject( + source=rasterio.band(src, i), + destination=rasterio.band(dst, i), + src_transform=src.transform, + src_crs=src.crs, + dst_transform=transform, + dst_crs=dst_crs, + resampling=Resampling.nearest, + ) diff --git a/process/subprocesses/ghsci.py b/process/subprocesses/ghsci.py index 74bc42de..8e042166 100644 --- a/process/subprocesses/ghsci.py +++ b/process/subprocesses/ghsci.py @@ -129,13 +129,23 @@ def compare(self, comparison): comparison = compare_resources(self, comparison) return comparison - def drop(self): + def drop(self, table=''): """Attempt to drop database results for this study region.""" - from _drop_study_region_database import ( - drop_study_region_database as drop_resources, - ) + if table == '': + from _drop_study_region_database import ( + drop_study_region_database as drop_resources, + ) - drop_resources(self) + drop_resources(self) + else: + with self.engine.begin() as connection: + try: + print('Dropping table {table}...}') + connection.execute( + text(f"""DROP TABLE IF EXISTS {table};"""), + ) + except Exception as e: + print(f'Error: {e}') def _create_database(self): """Create database for this study region.""" @@ -387,7 +397,13 @@ def ogr_to_db( """Read spatial data with ogr2ogr and save to Postgis database.""" import subprocess as sp - name = self.config['name'] + if source.count(':') == 1: + # appears to be using optional query syntax as could be used for a geopackage + parts = source.split(':') + source = parts[0] + query = parts[1] + del parts + crs_srid = self.config['crs_srid'] db = self.config['db'] db_host = self.config['db_host'] @@ -414,6 +430,120 @@ def ogr_to_db( else: return failure + def raster_to_db( + self, + raster: str, + config: dict, + field: str, + to_vector: bool = True, + reference_grid=False, + ): + """Read raster data save to Postgis database, optionally adding and indexing a unique grid_id variable for use as a reference grid for analysis.""" + import subprocess as sp + + from _utils import reproject_raster + from osgeo import gdal + + # disable noisy GDAL logging + # gdal.SetConfigOption('CPL_LOG', 'NUL') # Windows + gdal.SetConfigOption('CPL_LOG', '/dev/null') # Linux/MacOS + """Extract data from raster tiles and import to database.""" + print('Extracting raster data...') + raster_grid = self.config['population_grid'] + raster_stub = ( + f'{self.config["region_dir"]}/{raster_grid}_{self.codename}' + ) + # construct virtual raster table + vrt = f'{config["data_dir"]}/{raster_grid}_{config["crs_srid"]}.vrt' + raster_clipped = f'{raster_stub}_{config["crs_srid"]}.tif' + raster_projected = f'{raster_stub}_{self.config["crs"]["srid"]}.tif' + print(f'{raster} dataset...', end='', flush=True) + if not os.path.isfile(vrt): + tif_folder = f'{config["data_dir"]}' + tif_files = [ + os.path.join(tif_folder, file) + for file in os.listdir(tif_folder) + if os.path.splitext(file)[-1] == '.tif' + ] + gdal.BuildVRT(vrt, tif_files) + print(f'{raster} has now been indexed ({vrt}).') + else: + print(f'{raster} has already been indexed ({vrt}).') + print(f'\n{raster} data clipped to region...', end='', flush=True) + if not os.path.isfile(raster_clipped): + # extract study region boundary in projection of tiles + clipping_query = f'SELECT geom FROM {self.config["buffered_urban_study_region"]}' + clipping = self.get_gdf( + text(clipping_query), geom_col='geom', + ).to_crs(config['crs_srid']) + # get clipping boundary values in required order for gdal translate + bbox = list( + clipping.bounds[['minx', 'maxy', 'maxx', 'miny']].values[0], + ) + gdal.Translate(raster_clipped, vrt, projWin=bbox) + print(f'{raster} has now been created ({raster_clipped}).') + else: + print(f'{raster} has already been created ({raster_clipped}).') + print(f'\n{raster} projected for region...', end='', flush=True) + if not os.path.isfile(raster_projected): + # reproject and save the re-projected clipped raster + reproject_raster( + inpath=raster_clipped, + outpath=raster_projected, + new_crs=self.config['crs']['srid'], + ) + print(f' has now been created ({raster_projected}).') + else: + print(f' has already been created ({raster_projected}).') + if raster_grid not in self.tables: + print( + f'\nImport grid {raster_grid} to database... ', + end='', + flush=True, + ) + # import raster to postgis and vectorise, as per http://www.brianmcgill.org/postgis_zonal.pdf + if to_vector: + command = ( + f'raster2pgsql -d -s {self.config["crs"]["srid"]} -I -Y ' + f"-N {config['raster_nodata']} " + f'-t 1x1 {raster_projected} {raster_grid} ' + f'| PGPASSWORD={self.config["db_pwd"]} psql -U postgres -h {self.config["db_host"]} -d {self.config["db"]} ' + '>> /dev/null' + ) + sp.call(command, shell=True) + # remove empty cells + with self.engine.begin() as connection: + connection.execute( + text( + f"""DELETE FROM {raster_grid} WHERE (ST_SummaryStats(rast)).sum IS NULL;""", + ), + ) + # if reference grid add and index grid id + if reference_grid: + queries = [ + f"""ALTER TABLE {raster_grid} DROP COLUMN rid;""", + f"""ALTER TABLE {raster_grid} ADD grid_id bigserial;""", + f"""CREATE INDEX {raster_grid}_ix ON {raster_grid} (grid_id);""", + ] + for sql in queries: + with self.engine.begin() as connection: + connection.execute(text(sql)) + # add geometry column and calculate statistic + queries = [ + f"""ALTER TABLE {raster_grid} ADD COLUMN IF NOT EXISTS geom geometry;""", + f"""UPDATE {raster_grid} SET geom = ST_ConvexHull(rast);""", + f"""CREATE INDEX {raster_grid}_gix ON {raster_grid} USING GIST(geom);""", + f"""ALTER TABLE {raster_grid} ADD COLUMN IF NOT EXISTS {field} int;""", + f"""UPDATE {raster_grid} SET {field} = (ST_SummaryStats(rast)).sum;""", + f"""ALTER TABLE {raster_grid} DROP COLUMN rast;""", + ] + for sql in queries: + with self.engine.begin() as connection: + connection.execute(text(sql)) + print('Done.') + else: + print(f'{raster_grid} has been imported to database.') + def choropleth( self, field: str, @@ -423,6 +553,7 @@ def choropleth( save=True, attribution: str = 'Global Healthy and Sustainable City Indicators Collaboration', ): + """Plot a choropleth map of a specified field in a specified layer, with a custom title and attribution, with optional saving to an html file.""" from _utils import plot_choropleth_map tables = self.get_tables()