Skip to content

Commit

Permalink
add ancient yet simple-looking Python tools to munge Open Street Maps…
Browse files Browse the repository at this point in the history
… data into line docs files
  • Loading branch information
mikemccand committed Dec 7, 2021
1 parent bcfd6e8 commit a78e230
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 0 deletions.
47 changes: 47 additions & 0 deletions src/python/extractOpenStreetMaps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/usr/bin/env python

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import datetime
import subprocess
import re

r = re.compile('id="(.*?)" lat="(.*?)" lon="(.*?)"')

with subprocess.Popen('cat planet-latest.osm.20160307.bz2 | bunzip2 -c', stdout=subprocess.PIPE, shell=True) as p, \
open('latlon.txt', 'wb') as fOut:
#with open('planet-latest.osm', 'rb') as f,
# open('latlon.txt', 'wb') as fOut:
f = p.stdout
lineCount = 0
nodeCount = 0
while True:
l = f.readline().decode('utf-8')
if len(l) == 0:
break
l = l.strip()
lineCount += 1
if lineCount % 100000 == 0:
print('%s: %d, %d nodes...' % (datetime.datetime.now(), lineCount, nodeCount))
#print(l.rstrip().encode('ascii', errors='replace'))
if l.startswith('<node'):
m = r.search(l)
if m is not None:
fOut.write(('%s,%s,%s\n' % m.groups()).encode('ascii'))
nodeCount += 1
else:
print('match failed: %s' % l)

36 changes: 36 additions & 0 deletions src/python/subsetOpenStreetMaps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/usr/bin/env python

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import random

MIN_LAT = 51.0919106
MAX_LAT = 51.6542719
MIN_LON = -0.3867282
MAX_LON = 0.8492337

with open('latlon.txt', 'rb') as f, \
open('latlon.subsetPlusAllLondon.txt', 'wb') as fOut:
while True:
line = f.readline()
if len(line) == 0:
break
tup = line.strip().decode('ascii').split(',')
lat = float(tup[1])
lon = float(tup[2])
if random.randint(0, 49) == 17 or \
(lat >= MIN_LAT and lat <= MAX_LAT and lon >= MIN_LON and lon <= MAX_LON):
fOut.write(line)

0 comments on commit a78e230

Please sign in to comment.