fixed the issue where some line-segments overlapped, causing their in…

…tersections to be lines, which would create gaps in the network. This retains only their endpoints, which removes these gaps
matsim-melbourne · Feb 5, 2024 · 1dc4fec · 1dc4fec
1 parent b6bfd0a
commit 1dc4fec
Show file tree

Hide file tree

Showing 2 changed files with 365 additions and 0 deletions.
diff --git a/network.sql b/network.sql
@@ -0,0 +1,275 @@
+-- turning timing on
+\timing
+-- transforms the geometries to a projected (i.e, x,y) system and snaps to the
+-- nearest metre. Was using GDA2020 (7845), now using MGA Zone 55 (28355)
+
+-- in case the road has multi-lineStrings
+DROP TABLE IF EXISTS roads_split;
+CREATE TABLE roads_split AS
+SELECT ogc_fid, osm_id, highway, other_tags,
+       (ST_Dump(geom)).geom AS geom
+FROM roads;
+
+
+ALTER TABLE roads_split
+ ALTER COLUMN geom TYPE geometry(LineString,:v1)
+  USING ST_SnapToGrid(ST_Transform(geom,:v1),1);
+ALTER TABLE roads_points
+ ALTER COLUMN geom TYPE geometry(Point,:v1)
+  USING ST_SnapToGrid(ST_Transform(geom,:v1),1);
+CREATE INDEX roads_points_gix ON roads USING GIST (geom);
+
+-- determine if the road segment is a bridge or tunnel
+ALTER TABLE roads ADD COLUMN bridge_or_tunnel BOOLEAN;
+UPDATE roads
+  SET bridge_or_tunnel =
+       CASE WHEN other_tags LIKE '%bridge%' OR other_tags LIKE '%tunnel%' THEN TRUE
+       ELSE FALSE END;
+CREATE INDEX roads_gix ON roads USING GIST (geom);
+
+-- find the bridge-bridge or road-road intersections
+DROP TABLE IF EXISTS line_intersections;
+CREATE TABLE line_intersections AS
+SELECT a.osm_id AS osm_id_a, b.osm_id AS osm_id_b,
+  ST_Intersection(a.geom,b.geom) AS geom
+FROM roads AS a, roads AS b
+WHERE a.osm_id < b.osm_id AND
+  a.bridge_or_tunnel = b.bridge_or_tunnel AND
+  ST_Intersects(a.geom, b.geom) = TRUE;
+
+
+-- fixing the cases where the line intersections end up as edges. Here we
+-- store their individual endpoints
+DROP TABLE IF EXISTS line_intersections2;
+CREATE TABLE line_intersections2 AS
+  SELECT osm_id_a, osm_id_b, geom
+  FROM line_intersections
+  WHERE ST_GeometryType(geom) = 'ST_Point'
+UNION
+  SELECT osm_id_a, osm_id_b, ST_StartPoint(geom) AS geom
+  FROM line_intersections
+  WHERE ST_GeometryType(geom) = 'ST_LineString'
+UNION
+  SELECT osm_id_a, osm_id_b, ST_EndPoint(geom) AS geom
+  FROM line_intersections
+  WHERE ST_GeometryType(geom) = 'ST_LineString';
+
+
+
+-- group the intersections by osm_id
+DROP TABLE IF EXISTS line_intersections_grouped;
+CREATE TABLE line_intersections_grouped AS
+SELECT c.osm_id, st_unaryunion(st_collect(c.geom)) AS geom
+FROM
+ (SELECT a.osm_id_a AS osm_id, a.geom
+  FROM line_intersections as a
+  UNION
+  SELECT b.osm_id_b AS osm_id, b.geom
+  FROM line_intersections AS b) AS c
+GROUP BY osm_id;
+
+-- take the intersections, buffer them 0.01m, and use them to cut the lines they
+-- intersect. We then snap to the nearest metre, ensuring there are no gaps.
+-- Only intersections with the same osm_id need to be considered
+DROP TABLE IF EXISTS line_cut;
+CREATE TABLE line_cut AS
+SELECT a.osm_id,
+(ST_Dump(ST_SnapToGrid(ST_Difference(a.geom,ST_Buffer(b.geom,0.01)),1))).geom AS geom
+FROM roads_split AS a, line_intersections_grouped AS b
+WHERE a.osm_id = b.osm_id;
+
+-- all the osm_ids currently processed. Some segments don't have any
+-- intersections so they will need to be added. Adding an index here to speedup
+-- processing
+DROP TABLE IF EXISTS unique_ids;
+CREATE TABLE unique_ids AS
+SELECT DISTINCT osm_id
+FROM line_cut;
+CREATE UNIQUE INDEX osm_id_idx ON unique_ids (osm_id);
+
+-- adding the remaining road segments
+INSERT INTO line_cut
+SELECT a.osm_id, a.geom
+FROM roads_split AS a,
+ (SELECT osm_id
+  FROM roads_split
+  EXCEPT
+  SELECT osm_id
+  FROM unique_ids) AS b
+WHERE a.osm_id = b.osm_id;
+CREATE INDEX line_cut_gix ON line_cut USING GIST (geom);
+ALTER TABLE line_cut ADD COLUMN lid SERIAL PRIMARY KEY;
+
+-- find all of the road segment endpoints, including the new ones we've added
+-- from the intersections
+DROP TABLE IF EXISTS endpoints;
+CREATE TABLE endpoints AS
+SELECT ST_StartPoint(a.geom) as geom
+FROM line_cut as a
+UNION
+SELECT ST_EndPoint(b.geom) as geom
+FROM line_cut as b;
+CREATE INDEX endpoints_gix ON endpoints USING GIST (geom);
+
+-- cluster the endpoints to assign each unique endpoint location an id
+DROP TABLE IF EXISTS endpoints_clustered;
+CREATE TABLE endpoints_clustered AS
+SELECT (ST_Dump(a.geom)).geom AS geom
+FROM
+ (SELECT ST_Union(geom) AS geom
+  FROM endpoints) AS a;
+ALTER TABLE endpoints_clustered ADD COLUMN id SERIAL PRIMARY KEY;
+CREATE INDEX endpoints_clustered_gix ON endpoints_clustered USING GIST (geom);
+
+-- most lines will only have 2 endpoints near them, but some will have extra
+DROP TABLE IF EXISTS endpoints_near_lines;
+CREATE TABLE endpoints_near_lines AS
+SELECT c.lid, c.num_endpoints, c.geom
+FROM
+ (SELECT a.lid, COUNT(b.id) AS num_endpoints,
+         st_unaryunion(st_collect(b.geom)) AS geom
+  FROM
+    line_cut AS a,
+    endpoints_clustered AS b
+  WHERE
+    st_intersects(st_buffer(a.geom,0.1),b.geom)
+  GROUP BY
+    a.lid
+  ) AS c
+WHERE
+  c.num_endpoints > 2;
+
+DROP TABLE IF EXISTS line_cut2;
+CREATE TABLE line_cut2 AS
+SELECT a.lid, a.osm_id,
+(ST_Dump(ST_SnapToGrid(ST_Difference(a.geom,ST_Buffer(b.geom,0.01)),1))).geom AS geom
+FROM line_cut AS a, endpoints_near_lines AS b
+WHERE a.lid = b.lid;
+
+DELETE FROM line_cut 
+WHERE lid IN 
+  (SELECT lid FROM line_cut2);
+
+
+DROP TABLE IF EXISTS line_cut3;
+CREATE TABLE line_cut3 AS
+	SELECT osm_id, geom
+	FROM line_cut
+	WHERE lid NOT IN (SELECT lid FROM line_cut2)
+UNION
+	SELECT osm_id, geom
+	FROM line_cut2;
+	-- add length
+ALTER TABLE line_cut3 ADD COLUMN length INTEGER;
+UPDATE line_cut3 SET length = ST_Length(geom);
+
+-- add from and to id columns
+ALTER TABLE line_cut3 ADD COLUMN from_id INTEGER;
+ALTER TABLE line_cut3 ADD COLUMN to_id INTEGER;
+
+-- assign the from and to ids to the road segments
+UPDATE line_cut3 AS a
+  SET from_id = b.id
+FROM
+  endpoints_clustered as b
+WHERE
+  ST_Intersects(ST_StartPoint(a.geom),b.geom);
+
+UPDATE line_cut3 AS a
+  SET to_id = b.id
+FROM
+  endpoints_clustered as b
+WHERE
+  ST_Intersects(ST_EndPoint(a.geom),b.geom);
+
+DELETE FROM line_cut3
+WHERE ST_isEmpty(geom);
+
+
+-- This doesn't seem to be necessary, but will keep this in in case we do need
+-- it for other networks. It finds all the unique ids used by the line_cut3
+-- table and builds an index on it.
+DROP TABLE IF EXISTS unique_node_ids;
+CREATE TABLE unique_node_ids AS
+SELECT DISTINCT c.id
+FROM
+ (SELECT DISTINCT a.from_id AS id
+  FROM line_cut3 AS a
+  UNION
+  SELECT DISTINCT b.to_id AS id
+  FROM line_cut3 AS b) as c;
+CREATE UNIQUE INDEX unique_node_ids_idx ON unique_node_ids (id);
+
+-- filters endpoints_clustered to only have the nodes used in line_cut3
+DROP TABLE IF EXISTS endpoints_filtered;
+CREATE TABLE endpoints_filtered AS
+SELECT a.id, a.geom
+FROM endpoints_clustered AS a,
+  unique_node_ids AS b
+WHERE
+  a.id = b.id;
+CREATE INDEX endpoints_filtered_gix ON endpoints_filtered USING GIST (geom);
+
+-- the non-spatial data for the osm_id entries present in the network
+DROP TABLE IF EXISTS osm_metadata;
+CREATE TABLE osm_metadata AS
+SELECT osm_id, highway, other_tags
+FROM roads
+WHERE osm_id IN
+ (SELECT DISTINCT osm_id FROM line_cut3);
+
+-- from and to ids of edges that are roundabouts
+DROP TABLE IF EXISTS edges_roundabout;
+CREATE TABLE edges_roundabout AS
+SELECT a.from_id,
+  a.to_id,
+  CASE WHEN b.other_tags LIKE '%roundabout%' THEN TRUE
+                                             ELSE FALSE
+       END AS is_roundabout
+FROM
+  line_cut3 AS a,
+  osm_metadata AS b
+WHERE
+  a.osm_id = b.osm_id;
+
+-- node ids that connect to roundabouts
+DROP TABLE IF EXISTS nodes_roundabout;
+CREATE TABLE nodes_roundabout AS
+SELECT DISTINCT c.id
+FROM
+ (SELECT DISTINCT a.from_id AS id
+		FROM edges_roundabout AS a
+		WHERE is_roundabout = TRUE
+	UNION
+		SELECT DISTINCT b.to_id AS id
+		FROM edges_roundabout AS b
+		WHERE is_roundabout = TRUE) AS c;
+
+-- nodes attributed with if they are at roundabouts or traffic signals 
+DROP TABLE IF EXISTS nodes_attributed;
+CREATE TABLE nodes_attributed AS
+SELECT c.id,
+       CASE WHEN c.id IN (SELECT id FROM nodes_roundabout) THEN 1
+                                                           ELSE 0
+       END AS is_roundabout,
+       CASE WHEN c.length <= 20 THEN 1
+                                ELSE 0
+       END AS is_signal,
+       c.geom
+FROM
+ (SELECT
+		a.id,
+		a.geom,
+		ST_Distance(a.geom,b.geom) AS length
+	FROM
+		endpoints_filtered AS a
+	CROSS JOIN LATERAL
+		(SELECT geom
+		 FROM roads_points
+		 ORDER BY
+		   a.geom <-> geom
+		 LIMIT 1) AS b
+	) AS c;
+
+CREATE INDEX nodes_attributed_gix ON nodes_attributed USING GIST (geom);
+
diff --git a/processOSM.sh b/processOSM.sh
@@ -0,0 +1,90 @@
+#!/bin/bash
+
+extract=$1
+crs=$2
+output=$3
+
+#extract='./data/melbourne_australia.osm'
+#crs=28355
+#output='./data/network3.sqlite'
+
+# change to the directory this script is located in
+cd "$(dirname "$0")"
+# extract the roads from the osm file, put in temp.sqlite
+ogr2ogr -update -overwrite -nln roads -f "SQLite" -dsco SPATIALITE=YES \
+  -dialect SQLite -sql \
+  "SELECT CAST(osm_id AS DOUBLE PRECISION) AS osm_id, highway, other_tags, \
+    GEOMETRY FROM lines \
+    WHERE (highway IS NOT NULL AND \
+      NOT highway = 'bridleway' AND \
+      NOT highway = 'bus_stop' AND \
+      NOT highway = 'co' AND \
+      NOT highway = 'platform' AND \
+      NOT highway = 'raceway' AND \
+      NOT highway = 'services' AND \
+      NOT highway = 'traffic_island' AND \
+      highway NOT LIKE '%construction%' AND \
+      highway NOT LIKE '%proposed%' AND \
+      highway NOT LIKE '%disused%' AND \
+      highway NOT LIKE '%abandoned%') AND \
+      (other_tags IS NULL OR
+       (other_tags NOT LIKE '%busbar%' AND \
+        other_tags NOT LIKE '%abandoned%' AND \
+        other_tags NOT LIKE '%\"access\"=>\"private\"%')) " \
+  ./data/temp.sqlite $extract
+#      highway NOT LIKE '%service%' AND \
+# Removed since some service roads are used as footpaths (e.g., Royal Exhibition
+# building)
+
+# bridleway can be used for walking and cycling (provided you give way to horses
+# they are more common in the UK.
+
+# extract the traffic signals, put in temp.sqlite
+ogr2ogr -update -overwrite -nln roads_points -f "SQLite" -dsco SPATIALITE=YES \
+  -dialect SQLite -sql \
+  "SELECT CAST(osm_id AS DOUBLE PRECISION) AS osm_id, highway, other_tags, \
+    GEOMETRY FROM points \
+    WHERE highway LIKE '%traffic_signals%' " \
+  ./data/temp.sqlite $extract
+
+# extract the train and tram lines and add to temp.sqlite
+# apparently there are miniature railways
+ogr2ogr -update -overwrite -nln pt -f "SQLite" -dialect SQLite -sql \
+  "SELECT CAST(osm_id AS DOUBLE PRECISION) AS osm_id, highway, other_tags, \
+    GEOMETRY FROM lines \
+    WHERE other_tags LIKE '%railway%' AND \
+      other_tags NOT LIKE '%busbar%' AND \
+      other_tags NOT LIKE '%abandoned%' AND \
+      other_tags NOT LIKE '%miniature%' AND \
+      other_tags NOT LIKE '%proposed%' AND \
+      other_tags NOT LIKE '%disused%' AND \
+      other_tags NOT LIKE '%preserved%' AND \
+      other_tags NOT LIKE '%construction%' AND \
+      other_tags NOT LIKE '%\"service\"=>\"yard\"%'" \
+  ./data/temp.sqlite $extract
+
+# the postgres database name.
+DB_NAME="network_test"
+
+# Delete the database if it already exists
+COMMAND="psql -U postgres -c 'DROP DATABASE ${DB_NAME}' postgres"
+eval $COMMAND
+# Create the database and add the postgis extension
+createdb -U postgres ${DB_NAME}
+psql -c 'create extension postgis' ${DB_NAME} postgres
+
+ogr2ogr -overwrite -lco GEOMETRY_NAME=geom -lco SCHEMA=public -f "PostgreSQL" \
+  PG:"host=localhost port=5432 user=postgres dbname=${DB_NAME}" \
+  -a_srs "EPSG:4326" ./data/temp.sqlite roads
+ogr2ogr -overwrite -lco GEOMETRY_NAME=geom -lco SCHEMA=public -f "PostgreSQL" \
+  PG:"host=localhost port=5432 user=postgres dbname=${DB_NAME}" \
+  -a_srs "EPSG:4326" ./data/temp.sqlite roads_points
+
+# run the sql statements
+psql -U postgres -d ${DB_NAME} -a -f network.sql -v v1="$crs"
+
+# extract the nodes, edges, and osm metadata to the network file
+ogr2ogr -update -overwrite -f SQLite -dsco SPATIALITE=yes $output PG:"dbname=${DB_NAME} user=postgres" public.line_cut3 -nln edges
+ogr2ogr -update -overwrite -f SQLite -update $output PG:"dbname=${DB_NAME} user=postgres" public.nodes_attributed -nln nodes
+ogr2ogr -update -overwrite -f SQLite -update $output PG:"dbname=${DB_NAME} user=postgres" public.osm_metadata -nln osm_metadata
+