Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Address an edge case issue where sample points aren't associated with grid ids, causing a type error and failure to run #215

Merged
merged 1 commit into from
Mar 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 11 additions & 37 deletions process/subprocesses/_12_neighbourhood_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
cal_dist_node_to_nearest_pois,
create_full_nodes,
create_pdna_net,
filter_ids,
spatial_join_index_to_gdf,
)
from tqdm import tqdm
Expand Down Expand Up @@ -85,9 +86,7 @@ def main():
print(' - Set up simple nodes')
gdf_nodes = ox.graph_to_gdfs(G_proj, nodes=True, edges=False)
# associate nodes with id
gdf_nodes = spatial_join_index_to_gdf(
gdf_nodes, grid, right_index_name='grid_id', join_type='within',
)
gdf_nodes = spatial_join_index_to_gdf(gdf_nodes, grid, dropna=False)
# keep only the unique node id column
gdf_nodes = gdf_nodes[['grid_id', 'geometry']]
# drop any nodes which are na
Expand Down Expand Up @@ -258,42 +257,17 @@ def main():
samplePointsData = gpd.read_file(gpkg, layer='urban_sample_points')
# create 'grid_id' for sample point, if it not exists
if 'grid_id' not in samplePointsData.columns:
samplePointsData = spatial_join_index_to_gdf(
samplePointsData,
grid,
right_index_name='grid_id',
join_type='within',
)
print(
'Restrict sample points to those not located in grids with a population below '
samplePointsData = spatial_join_index_to_gdf(samplePointsData, grid)
samplePointsData = filter_ids(
df=samplePointsData,
query=f"""grid_id not in {list(grid.query(f'pop_est < {population["pop_min_threshold"]}').index.values)}""",
message='Restrict sample points to those not located in grids with a population below '
f"the minimum threshold value ({population['pop_min_threshold']})...",
),
below_minimum_pop_ids = list(
grid.query(
f'pop_est < {population["pop_min_threshold"]}',
).index.values,
)
sample_point_length_pre_discard = len(samplePointsData)
samplePointsData = samplePointsData[
~samplePointsData.grid_id.isin(below_minimum_pop_ids)
]
sample_point_length_post_discard = len(samplePointsData)
print(
f' {sample_point_length_pre_discard - sample_point_length_post_discard} sample points discarded, '
f'leaving {sample_point_length_post_discard} remaining.',
)
print(
'Restrict sample points to those with two associated sample nodes...',
),
sample_point_length_pre_discard = len(samplePointsData)
samplePointsData = samplePointsData.query(
f'n1 in {list(gdf_nodes_simple.index.values)} '
f'and n2 in {list(gdf_nodes_simple.index.values)}',
)
sample_point_length_post_discard = len(samplePointsData)
print(
f' {sample_point_length_pre_discard - sample_point_length_post_discard} sample points discarded, '
f'leaving {sample_point_length_post_discard} remaining.',
samplePointsData = filter_ids(
df=samplePointsData,
query=f"""n1 in {list(gdf_nodes_simple.index.values)} and n2 in {list(gdf_nodes_simple.index.values)}""",
message='Restrict sample points to those with two associated sample nodes...',
)
samplePointsData.set_index('point_id', inplace=True)
distance_names = list(gdf_nodes_poi_dist.columns)
Expand Down
40 changes: 34 additions & 6 deletions process/subprocesses/setup_sp.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,29 +17,57 @@


def spatial_join_index_to_gdf(
gdf, join_gdf, right_index_name, join_type='within',
gdf, join_gdf, join_type='within', dropna=True,
):
"""Append to a geodataframe the named index of another using spatial join.

Parameters
----------
gdf: GeoDataFrame
join_gdf: GeoDataFrame
right_index_name: str (default: None)
join_tyoe: str (default 'within')
join_type: str (default 'within')
dropna: True

Returns
-------
GeoDataFrame
"""
gdf_columns = list(gdf.columns)
gdf = gpd.sjoin(gdf, join_gdf, how='left', predicate=join_type)
if right_index_name is not None:
gdf = gdf[gdf_columns + ['index_right']]
gdf.columns = gdf_columns + [right_index_name]
gdf = gdf[gdf_columns + ['index_right']]
gdf.columns = gdf_columns + [join_gdf.index.name]
if dropna:
gdf = gdf[~gdf[join_gdf.index.name].isna()]
gdf[join_gdf.index.name] = gdf[join_gdf.index.name].astype(
join_gdf.index.dtype,
)
return gdf


def filter_ids(df, query, message):
"""Pandas query designed to filter and report feedback on counts before and after query.

Parameters
----------
df: DataFrame
query: str Pandas query string
message: str An informative message to print describing query in plain language

Returns
-------
DataFrame
"""
print(message)
pre_discard = len(df)
df = df.query(query)
post_discard = len(df)
print(
f' {pre_discard - post_discard} sample points discarded, '
f'leaving {post_discard} remaining.',
)
return df


def create_pdna_net(gdf_nodes, gdf_edges, predistance=500):
"""Create pandana network to prepare for calculating the accessibility to destinations The network is comprised of a set of nodes and edges.

Expand Down