diff --git a/process/subprocesses/_12_neighbourhood_analysis.py b/process/subprocesses/_12_neighbourhood_analysis.py index b7ee83fc..6165962b 100644 --- a/process/subprocesses/_12_neighbourhood_analysis.py +++ b/process/subprocesses/_12_neighbourhood_analysis.py @@ -33,6 +33,7 @@ cal_dist_node_to_nearest_pois, create_full_nodes, create_pdna_net, + filter_ids, spatial_join_index_to_gdf, ) from tqdm import tqdm @@ -85,9 +86,7 @@ def main(): print(' - Set up simple nodes') gdf_nodes = ox.graph_to_gdfs(G_proj, nodes=True, edges=False) # associate nodes with id - gdf_nodes = spatial_join_index_to_gdf( - gdf_nodes, grid, right_index_name='grid_id', join_type='within', - ) + gdf_nodes = spatial_join_index_to_gdf(gdf_nodes, grid, dropna=False) # keep only the unique node id column gdf_nodes = gdf_nodes[['grid_id', 'geometry']] # drop any nodes which are na @@ -258,42 +257,17 @@ def main(): samplePointsData = gpd.read_file(gpkg, layer='urban_sample_points') # create 'grid_id' for sample point, if it not exists if 'grid_id' not in samplePointsData.columns: - samplePointsData = spatial_join_index_to_gdf( - samplePointsData, - grid, - right_index_name='grid_id', - join_type='within', - ) - print( - 'Restrict sample points to those not located in grids with a population below ' + samplePointsData = spatial_join_index_to_gdf(samplePointsData, grid) + samplePointsData = filter_ids( + df=samplePointsData, + query=f"""grid_id not in {list(grid.query(f'pop_est < {population["pop_min_threshold"]}').index.values)}""", + message='Restrict sample points to those not located in grids with a population below ' f"the minimum threshold value ({population['pop_min_threshold']})...", - ), - below_minimum_pop_ids = list( - grid.query( - f'pop_est < {population["pop_min_threshold"]}', - ).index.values, - ) - sample_point_length_pre_discard = len(samplePointsData) - samplePointsData = samplePointsData[ - ~samplePointsData.grid_id.isin(below_minimum_pop_ids) - ] - sample_point_length_post_discard = len(samplePointsData) - print( - f' {sample_point_length_pre_discard - sample_point_length_post_discard} sample points discarded, ' - f'leaving {sample_point_length_post_discard} remaining.', ) - print( - 'Restrict sample points to those with two associated sample nodes...', - ), - sample_point_length_pre_discard = len(samplePointsData) - samplePointsData = samplePointsData.query( - f'n1 in {list(gdf_nodes_simple.index.values)} ' - f'and n2 in {list(gdf_nodes_simple.index.values)}', - ) - sample_point_length_post_discard = len(samplePointsData) - print( - f' {sample_point_length_pre_discard - sample_point_length_post_discard} sample points discarded, ' - f'leaving {sample_point_length_post_discard} remaining.', + samplePointsData = filter_ids( + df=samplePointsData, + query=f"""n1 in {list(gdf_nodes_simple.index.values)} and n2 in {list(gdf_nodes_simple.index.values)}""", + message='Restrict sample points to those with two associated sample nodes...', ) samplePointsData.set_index('point_id', inplace=True) distance_names = list(gdf_nodes_poi_dist.columns) diff --git a/process/subprocesses/setup_sp.py b/process/subprocesses/setup_sp.py index b8865ac3..fb7f642e 100644 --- a/process/subprocesses/setup_sp.py +++ b/process/subprocesses/setup_sp.py @@ -17,7 +17,7 @@ def spatial_join_index_to_gdf( - gdf, join_gdf, right_index_name, join_type='within', + gdf, join_gdf, join_type='within', dropna=True, ): """Append to a geodataframe the named index of another using spatial join. @@ -25,8 +25,8 @@ def spatial_join_index_to_gdf( ---------- gdf: GeoDataFrame join_gdf: GeoDataFrame - right_index_name: str (default: None) - join_tyoe: str (default 'within') + join_type: str (default 'within') + dropna: True Returns ------- @@ -34,12 +34,40 @@ def spatial_join_index_to_gdf( """ gdf_columns = list(gdf.columns) gdf = gpd.sjoin(gdf, join_gdf, how='left', predicate=join_type) - if right_index_name is not None: - gdf = gdf[gdf_columns + ['index_right']] - gdf.columns = gdf_columns + [right_index_name] + gdf = gdf[gdf_columns + ['index_right']] + gdf.columns = gdf_columns + [join_gdf.index.name] + if dropna: + gdf = gdf[~gdf[join_gdf.index.name].isna()] + gdf[join_gdf.index.name] = gdf[join_gdf.index.name].astype( + join_gdf.index.dtype, + ) return gdf +def filter_ids(df, query, message): + """Pandas query designed to filter and report feedback on counts before and after query. + + Parameters + ---------- + df: DataFrame + query: str Pandas query string + message: str An informative message to print describing query in plain language + + Returns + ------- + DataFrame + """ + print(message) + pre_discard = len(df) + df = df.query(query) + post_discard = len(df) + print( + f' {pre_discard - post_discard} sample points discarded, ' + f'leaving {post_discard} remaining.', + ) + return df + + def create_pdna_net(gdf_nodes, gdf_edges, predistance=500): """Create pandana network to prepare for calculating the accessibility to destinations The network is comprised of a set of nodes and edges.