Skip to content

Commit

Permalink
Dynamic sizing, begin work on docker support
Browse files Browse the repository at this point in the history
  • Loading branch information
tongning committed Sep 8, 2019
1 parent ce15659 commit 83ca59c
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 26 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
.vscode
tohme_crops
*.pyc
old
Expand Down
8 changes: 8 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
FROM ubuntu:16.04
COPY . /app
WORKDIR /app
RUN apt-get update
RUN apt-get install -y python-pip libfreetype6-dev libxft-dev python-dev libjpeg8-dev libblas-dev liblapack-dev libatlas-base-dev gfortran python-tk
RUN pip install -r requirements.txt
ENTRYPOINT ["python", "DownloadRunner.py"]
CMD []
112 changes: 86 additions & 26 deletions DownloadRunner.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
# !/usr/bin/python

# ****************************************
# Specify the file storage location below
# ****************************************
storage_location = "/home/anthony/Downloads/test/"
# !/usr/bin/python2

from SidewalkDB import *
from sys import argv

import os
import httplib
Expand All @@ -20,17 +16,51 @@
from random import shuffle
import fnmatch

from subprocess import PIPE,STDOUT

try:
from xml.etree import cElementTree as ET
except ImportError, e:
from xml.etree import ElementTree as ET

delay = 30

if len(argv) != 3:
print("Usage: python DownloadRunner.py sidewalk_server_domain storage_path")
print(" sidewalk_server_domain - FDQN of SidewalkWebpage server to fetch pano list from")
print(" storage_path - location to store scraped panos")
print(" Example: python DownloadRunner.py sidewalk-sea.cs.washington.edu /destination/path")
exit(0)

sidewalk_server_fqdn = argv[1]
storage_location = argv[2]

if not os.path.exists(storage_location):
os.mkdir(storage_location)

print("Starting run with pano list fetched from %s and destination path %s" % (sidewalk_server_fqdn, storage_location))

def check_download_failed_previously(panoId):
if panoId in open('scrape.log').read():
return True
else:
return False

def extract_panowidthheight(path_to_metadata_xml):
pano = {}
pano_xml = open(path_to_metadata_xml, 'rb')
tree = ET.parse(pano_xml)
root = tree.getroot()
for child in root:
if child.tag == 'data_properties':
pano[child.tag] = child.attrib

return (int(pano['data_properties']['image_width']),int(pano['data_properties']['image_height']))


def fetch_pano_ids_from_webserver():
unique_ids = []
conn = httplib.HTTPSConnection("sidewalk.umiacs.umd.edu")
conn = httplib.HTTPSConnection(sidewalk_server_fqdn)
conn.request("GET", "/adminapi/labels/panoid")
r1 = conn.getresponse()
data = r1.read()
Expand All @@ -39,7 +69,11 @@ def fetch_pano_ids_from_webserver():

for value in jsondata["features"]:
if value["properties"]["gsv_panorama_id"] not in unique_ids:
unique_ids.append(value["properties"]["gsv_panorama_id"])
#Check if the pano_id is an empty string
if value["properties"]["gsv_panorama_id"]:
unique_ids.append(value["properties"]["gsv_panorama_id"])
else:
print "Pano ID is an empty string"
return unique_ids


Expand All @@ -53,19 +87,25 @@ def download_panorama_images(storage_path, pano_list=None):

counter = 0
failed = 0
im_dimension = (512 * 26, 512 * 13)
blank_image = Image.new('RGBA', im_dimension, (0, 0, 0, 0))

base_url = 'http://maps.google.com/cbk?'
shuffle(unique_ids)
for pano_id in unique_ids:

pano_xml_path = os.path.join(storage_path, pano_id[:2], pano_id + ".xml")
if not os.path.isfile(pano_xml_path):
continue
(image_width,image_height) = extract_panowidthheight(pano_xml_path)
im_dimension = (image_width, image_height)
blank_image = Image.new('RGBA', im_dimension, (0, 0, 0, 0))

print '-- Extracting images for', pano_id,

destination_dir = os.path.join(storage_path, pano_id[:2])
if not os.path.isdir(destination_dir):
os.makedirs(destination_dir)

filename = pano_id + ".jpg"

out_image_name = os.path.join(destination_dir, filename)
if os.path.isfile(out_image_name):
print 'File already exists.'
Expand All @@ -74,23 +114,30 @@ def download_panorama_images(storage_path, pano_list=None):
print 'Completed ' + str(counter) + ' of ' + str(len(unique_ids))
continue

for y in range(13):
for x in range(26):
for y in range(image_height / 512):
for x in range(image_width / 512):
url_param = 'output=tile&zoom=5&x=' + str(x) + '&y=' + str(
y) + '&cb_client=maps_sv&fover=2&onerr=3&renderer=spherical&v=4&panoid=' + pano_id
url = base_url + url_param

# Open an image, resize it to 512x512, and paste it into a canvas
req = urllib.urlopen(url)
file = cStringIO.StringIO(req.read())
im = Image.open(file)
im = im.resize((512, 512))
try:
im = Image.open(file)
im = im.resize((512, 512))
except Exception:
print 'Error. Image.open didnt work here for some reason'
print url
print y,x
print req
print pano_id

blank_image.paste(im, (512 * x, 512 * y))

# Wait a little bit so you don't get blocked by Google
# sleep_in_milliseconds = float(delay) / 1000
# sleep(sleep_in_milliseconds)
sleep_in_milliseconds = float(delay) / 1000
sleep(sleep_in_milliseconds)
print '.',
print

Expand Down Expand Up @@ -119,8 +166,8 @@ def download_panorama_images(storage_path, pano_list=None):
temp_blank_image.paste(im, (512 * x, 512 * y))

# Wait a little bit so you don't get blocked by Google
# sleep_in_milliseconds = float(delay) / 1000
# sleep(sleep_in_milliseconds)
sleep_in_milliseconds = float(delay) / 1000
sleep(sleep_in_milliseconds)
print '.',
print
temp_blank_image = temp_blank_image.resize(im_dimension, Image.ANTIALIAS) # resize
Expand Down Expand Up @@ -161,10 +208,14 @@ def download_panorama_depthdata(storage_path, decode=True, pano_list=None):
continue

url = base_url + pano_id
with open(destination_file, 'wb') as f:
req = urllib2.urlopen(url)
for line in req:
f.write(line)
try:
with open(destination_file, 'wb') as f:
req = urllib2.urlopen(url)
for line in req:
f.write(line)
except:
print 'Unable to download depth data for pano.'
continue

print 'Done.'

Expand All @@ -184,11 +235,20 @@ def generate_depthmapfiles(path_to_scrapes):

# Generate a .depth.txt file for the .xml file
output_file = os.path.join(root, pano_id + ".depth.txt")
call(["./decode_depthmap", xml_location, output_file])
if os.path.isfile(output_file):
print 'Depth file already exists'
continue

output_code = call(["./decode_depthmap", xml_location, output_file])
if output_code == 0:
print 'Succesfully converted ',pano_id,' to depth.txt'
else:
print 'Unsuccessful. Could not convert ',pano_id,' to depth.txt . Returned with error ',output_code

print "Fetching pano-ids"
pano_list = fetch_pano_ids_from_webserver()

download_panorama_images(storage_location, pano_list=pano_list) # Trailing slash required
# pano_list = [pano_list[111], pano_list[112]]
print "Fetching Panoramas"
download_panorama_depthdata(storage_location, pano_list=pano_list)
download_panorama_images(storage_location, pano_list=pano_list) # Trailing slash required
generate_depthmapfiles(storage_location)

0 comments on commit 83ca59c

Please sign in to comment.