Skip to content

Commit

Permalink
Merge pull request #215 from global-healthy-liveable-cities/enhancements
Browse files Browse the repository at this point in the history
Address an edge case issue where sample points aren't associated with grid ids, causing a type error and failure to run
  • Loading branch information
carlhiggs authored Mar 16, 2023
2 parents 7c7b974 + 0bf3d12 commit 04fc389
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 43 deletions.
48 changes: 11 additions & 37 deletions process/subprocesses/_12_neighbourhood_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
cal_dist_node_to_nearest_pois,
create_full_nodes,
create_pdna_net,
filter_ids,
spatial_join_index_to_gdf,
)
from tqdm import tqdm
Expand Down Expand Up @@ -85,9 +86,7 @@ def main():
print(' - Set up simple nodes')
gdf_nodes = ox.graph_to_gdfs(G_proj, nodes=True, edges=False)
# associate nodes with id
gdf_nodes = spatial_join_index_to_gdf(
gdf_nodes, grid, right_index_name='grid_id', join_type='within',
)
gdf_nodes = spatial_join_index_to_gdf(gdf_nodes, grid, dropna=False)
# keep only the unique node id column
gdf_nodes = gdf_nodes[['grid_id', 'geometry']]
# drop any nodes which are na
Expand Down Expand Up @@ -258,42 +257,17 @@ def main():
samplePointsData = gpd.read_file(gpkg, layer='urban_sample_points')
# create 'grid_id' for sample point, if it not exists
if 'grid_id' not in samplePointsData.columns:
samplePointsData = spatial_join_index_to_gdf(
samplePointsData,
grid,
right_index_name='grid_id',
join_type='within',
)
print(
'Restrict sample points to those not located in grids with a population below '
samplePointsData = spatial_join_index_to_gdf(samplePointsData, grid)
samplePointsData = filter_ids(
df=samplePointsData,
query=f"""grid_id not in {list(grid.query(f'pop_est < {population["pop_min_threshold"]}').index.values)}""",
message='Restrict sample points to those not located in grids with a population below '
f"the minimum threshold value ({population['pop_min_threshold']})...",
),
below_minimum_pop_ids = list(
grid.query(
f'pop_est < {population["pop_min_threshold"]}',
).index.values,
)
sample_point_length_pre_discard = len(samplePointsData)
samplePointsData = samplePointsData[
~samplePointsData.grid_id.isin(below_minimum_pop_ids)
]
sample_point_length_post_discard = len(samplePointsData)
print(
f' {sample_point_length_pre_discard - sample_point_length_post_discard} sample points discarded, '
f'leaving {sample_point_length_post_discard} remaining.',
)
print(
'Restrict sample points to those with two associated sample nodes...',
),
sample_point_length_pre_discard = len(samplePointsData)
samplePointsData = samplePointsData.query(
f'n1 in {list(gdf_nodes_simple.index.values)} '
f'and n2 in {list(gdf_nodes_simple.index.values)}',
)
sample_point_length_post_discard = len(samplePointsData)
print(
f' {sample_point_length_pre_discard - sample_point_length_post_discard} sample points discarded, '
f'leaving {sample_point_length_post_discard} remaining.',
samplePointsData = filter_ids(
df=samplePointsData,
query=f"""n1 in {list(gdf_nodes_simple.index.values)} and n2 in {list(gdf_nodes_simple.index.values)}""",
message='Restrict sample points to those with two associated sample nodes...',
)
samplePointsData.set_index('point_id', inplace=True)
distance_names = list(gdf_nodes_poi_dist.columns)
Expand Down
40 changes: 34 additions & 6 deletions process/subprocesses/setup_sp.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,29 +17,57 @@


def spatial_join_index_to_gdf(
gdf, join_gdf, right_index_name, join_type='within',
gdf, join_gdf, join_type='within', dropna=True,
):
"""Append to a geodataframe the named index of another using spatial join.
Parameters
----------
gdf: GeoDataFrame
join_gdf: GeoDataFrame
right_index_name: str (default: None)
join_tyoe: str (default 'within')
join_type: str (default 'within')
dropna: True
Returns
-------
GeoDataFrame
"""
gdf_columns = list(gdf.columns)
gdf = gpd.sjoin(gdf, join_gdf, how='left', predicate=join_type)
if right_index_name is not None:
gdf = gdf[gdf_columns + ['index_right']]
gdf.columns = gdf_columns + [right_index_name]
gdf = gdf[gdf_columns + ['index_right']]
gdf.columns = gdf_columns + [join_gdf.index.name]
if dropna:
gdf = gdf[~gdf[join_gdf.index.name].isna()]
gdf[join_gdf.index.name] = gdf[join_gdf.index.name].astype(
join_gdf.index.dtype,
)
return gdf


def filter_ids(df, query, message):
"""Pandas query designed to filter and report feedback on counts before and after query.
Parameters
----------
df: DataFrame
query: str Pandas query string
message: str An informative message to print describing query in plain language
Returns
-------
DataFrame
"""
print(message)
pre_discard = len(df)
df = df.query(query)
post_discard = len(df)
print(
f' {pre_discard - post_discard} sample points discarded, '
f'leaving {post_discard} remaining.',
)
return df


def create_pdna_net(gdf_nodes, gdf_edges, predistance=500):
"""Create pandana network to prepare for calculating the accessibility to destinations The network is comprised of a set of nodes and edges.
Expand Down

0 comments on commit 04fc389

Please sign in to comment.