-
Notifications
You must be signed in to change notification settings - Fork 0
/
claude
69 lines (55 loc) · 2.61 KB
/
claude
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point
import os
def process_zipcodes(exposure_data_path, gfk_zipcodes_path, output_folder):
"""
Process zipcodes by identifying missing zipcodes and assigning nearest zipcodes.
Parameters:
- exposure_data_path: Path to the exposure data CSV/Excel file
- gfk_zipcodes_path: Path to the GFK zipcodes shapefile
- output_folder: Folder to save output files
"""
# Create output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)
# Read exposure data
exposure_df = pd.read_csv(exposure_data_path)
# Read GFK zipcodes
gfk_gdf = gpd.read_file(gfk_zipcodes_path)
# Create a GeoDataFrame from exposure data
exposure_gdf = gpd.GeoDataFrame(
exposure_df,
geometry=gpd.points_from_xy(exposure_df.lon, exposure_df.lat),
crs=gfk_gdf.crs
)
# Find unique zipcodes in exposure data
exposure_zipcodes = set(exposure_df['zipcode'])
gfk_zipcodes = set(gfk_gdf['zipcode']) # Assuming 'zipcode' column exists in GFK data
# Zipcodes in exposure data but not in GFK zipcodes
missing_zipcodes = exposure_zipcodes - gfk_zipcodes
# Filter exposure data for missing zipcodes
missing_zipcodes_df = exposure_gdf[exposure_gdf['zipcode'].isin(missing_zipcodes)]
# Export missing zipcodes data
missing_zipcodes_path = os.path.join(output_folder, 'missing_zipcodes.csv')
missing_zipcodes_df.drop('geometry', axis=1).to_csv(missing_zipcodes_path, index=False)
# Find nearest zipcode for missing zipcodes
def find_nearest_zipcode(point):
# Calculate distances to all GFK zipcode polygons
distances = gfk_gdf.geometry.distance(point)
# Return the zipcode of the nearest polygon
return gfk_gdf.iloc[distances.argmin()]['zipcode']
# Assign nearest zipcodes
missing_zipcodes_df['nearest_zipcode'] = missing_zipcodes_df['geometry'].apply(find_nearest_zipcode)
# Export nearest zipcode assignments
nearest_zipcodes_path = os.path.join(output_folder, 'nearest_zipcodes.csv')
missing_zipcodes_df.drop('geometry', axis=1).to_csv(nearest_zipcodes_path, index=False)
# Print summary
print(f"Total missing zipcodes: {len(missing_zipcodes)}")
print(f"Missing zipcodes exported to: {missing_zipcodes_path}")
print(f"Nearest zipcodes exported to: {nearest_zipcodes_path}")
# Example usage
# process_zipcodes(
# exposure_data_path='path/to/exposure_data.csv',
# gfk_zipcodes_path='path/to/gfk_zipcodes.shp',
# output_folder='path/to/output_folder'
# )