From 83ca59cb4ecc78818525f5ba0474900687bfb378 Mon Sep 17 00:00:00 2001
From: Anthony Li <antli.inbox@gmail.com>
Date: Sun, 8 Sep 2019 01:53:09 -0400
Subject: [PATCH] Dynamic sizing, begin work on docker support

---
 .gitignore        |   1 +
 Dockerfile        |   8 ++++
 DownloadRunner.py | 112 +++++++++++++++++++++++++++++++++++-----------
 3 files changed, 95 insertions(+), 26 deletions(-)
 create mode 100644 Dockerfile

diff --git a/.gitignore b/.gitignore
index 1b60898..d9787eb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+.vscode
 tohme_crops
 *.pyc
 old
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..3fb7994
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,8 @@
+FROM ubuntu:16.04
+COPY . /app
+WORKDIR /app
+RUN apt-get update
+RUN apt-get install -y python-pip libfreetype6-dev libxft-dev python-dev libjpeg8-dev libblas-dev liblapack-dev libatlas-base-dev gfortran python-tk
+RUN pip install -r requirements.txt
+ENTRYPOINT ["python", "DownloadRunner.py"]
+CMD []
\ No newline at end of file
diff --git a/DownloadRunner.py b/DownloadRunner.py
index 2dd998e..b51f809 100644
--- a/DownloadRunner.py
+++ b/DownloadRunner.py
@@ -1,11 +1,7 @@
-# !/usr/bin/python
-
-# ****************************************
-# Specify the file storage location below
-# ****************************************
-storage_location = "/home/anthony/Downloads/test/"
+# !/usr/bin/python2
 
 from SidewalkDB import *
+from sys import argv
 
 import os
 import httplib
@@ -20,6 +16,29 @@
 from random import shuffle
 import fnmatch
 
+from subprocess import PIPE,STDOUT
+
+try:
+    from xml.etree import cElementTree as ET
+except ImportError, e:
+    from xml.etree import ElementTree as ET
+
+delay = 30
+
+if len(argv) != 3:
+    print("Usage: python DownloadRunner.py sidewalk_server_domain storage_path")
+    print("    sidewalk_server_domain - FDQN of SidewalkWebpage server to fetch pano list from")
+    print("    storage_path - location to store scraped panos")
+    print("    Example: python DownloadRunner.py sidewalk-sea.cs.washington.edu /destination/path")
+    exit(0)
+
+sidewalk_server_fqdn = argv[1]
+storage_location = argv[2]
+
+if not os.path.exists(storage_location):
+    os.mkdir(storage_location)
+
+print("Starting run with pano list fetched from %s and destination path %s" % (sidewalk_server_fqdn, storage_location))
 
 def check_download_failed_previously(panoId):
     if panoId in open('scrape.log').read():
@@ -27,10 +46,21 @@ def check_download_failed_previously(panoId):
     else:
         return False
 
+def extract_panowidthheight(path_to_metadata_xml):
+    pano = {}
+    pano_xml = open(path_to_metadata_xml, 'rb')
+    tree = ET.parse(pano_xml)
+    root = tree.getroot()
+    for child in root:
+        if child.tag == 'data_properties':
+            pano[child.tag] = child.attrib
+    
+    return (int(pano['data_properties']['image_width']),int(pano['data_properties']['image_height']))
+
 
 def fetch_pano_ids_from_webserver():
     unique_ids = []
-    conn = httplib.HTTPSConnection("sidewalk.umiacs.umd.edu")
+    conn = httplib.HTTPSConnection(sidewalk_server_fqdn)
     conn.request("GET", "/adminapi/labels/panoid")
     r1 = conn.getresponse()
     data = r1.read()
@@ -39,7 +69,11 @@ def fetch_pano_ids_from_webserver():
 
     for value in jsondata["features"]:
         if value["properties"]["gsv_panorama_id"] not in unique_ids:
-            unique_ids.append(value["properties"]["gsv_panorama_id"])
+            #Check if the pano_id is an empty string
+            if value["properties"]["gsv_panorama_id"]:
+                unique_ids.append(value["properties"]["gsv_panorama_id"])
+            else:
+                print "Pano ID is an empty string"
     return unique_ids
 
 
@@ -53,11 +87,18 @@ def download_panorama_images(storage_path, pano_list=None):
 
     counter = 0
     failed = 0
-    im_dimension = (512 * 26, 512 * 13)
-    blank_image = Image.new('RGBA', im_dimension, (0, 0, 0, 0))
+
     base_url = 'http://maps.google.com/cbk?'
     shuffle(unique_ids)
     for pano_id in unique_ids:
+
+        pano_xml_path = os.path.join(storage_path, pano_id[:2], pano_id + ".xml")
+        if not os.path.isfile(pano_xml_path):
+            continue
+        (image_width,image_height) = extract_panowidthheight(pano_xml_path)
+        im_dimension = (image_width, image_height)
+        blank_image = Image.new('RGBA', im_dimension, (0, 0, 0, 0))
+
         print '-- Extracting images for', pano_id,
 
         destination_dir = os.path.join(storage_path, pano_id[:2])
@@ -65,7 +106,6 @@ def download_panorama_images(storage_path, pano_list=None):
             os.makedirs(destination_dir)
 
         filename = pano_id + ".jpg"
-
         out_image_name = os.path.join(destination_dir, filename)
         if os.path.isfile(out_image_name):
             print 'File already exists.'
@@ -74,8 +114,8 @@ def download_panorama_images(storage_path, pano_list=None):
             print 'Completed ' + str(counter) + ' of ' + str(len(unique_ids))
             continue
 
-        for y in range(13):
-            for x in range(26):
+        for y in range(image_height / 512):
+            for x in range(image_width / 512):
                 url_param = 'output=tile&zoom=5&x=' + str(x) + '&y=' + str(
                     y) + '&cb_client=maps_sv&fover=2&onerr=3&renderer=spherical&v=4&panoid=' + pano_id
                 url = base_url + url_param
@@ -83,14 +123,21 @@ def download_panorama_images(storage_path, pano_list=None):
                 # Open an image, resize it to 512x512, and paste it into a canvas
                 req = urllib.urlopen(url)
                 file = cStringIO.StringIO(req.read())
-                im = Image.open(file)
-                im = im.resize((512, 512))
+                try:
+                    im = Image.open(file)
+                    im = im.resize((512, 512))
+                except Exception:
+                    print 'Error. Image.open didnt work here for some reason'
+                    print url
+                    print y,x
+                    print req
+                    print pano_id
 
                 blank_image.paste(im, (512 * x, 512 * y))
 
                 # Wait a little bit so you don't get blocked by Google
-                # sleep_in_milliseconds = float(delay) / 1000
-                # sleep(sleep_in_milliseconds)
+                sleep_in_milliseconds = float(delay) / 1000
+                sleep(sleep_in_milliseconds)
             print '.',
         print
 
@@ -119,8 +166,8 @@ def download_panorama_images(storage_path, pano_list=None):
                     temp_blank_image.paste(im, (512 * x, 512 * y))
 
                     # Wait a little bit so you don't get blocked by Google
-                    # sleep_in_milliseconds = float(delay) / 1000
-                    # sleep(sleep_in_milliseconds)
+                    sleep_in_milliseconds = float(delay) / 1000
+                    sleep(sleep_in_milliseconds)
                 print '.',
             print
             temp_blank_image = temp_blank_image.resize(im_dimension, Image.ANTIALIAS)  # resize
@@ -161,10 +208,14 @@ def download_panorama_depthdata(storage_path, decode=True, pano_list=None):
             continue
 
         url = base_url + pano_id
-        with open(destination_file, 'wb') as f:
-            req = urllib2.urlopen(url)
-            for line in req:
-                f.write(line)
+        try:
+            with open(destination_file, 'wb') as f:
+                req = urllib2.urlopen(url)
+                for line in req:
+                    f.write(line)
+        except:
+            print 'Unable to download depth data for pano.'
+            continue
 
         print 'Done.'
 
@@ -184,11 +235,20 @@ def generate_depthmapfiles(path_to_scrapes):
 
             # Generate a .depth.txt file for the .xml file
             output_file = os.path.join(root, pano_id + ".depth.txt")
-            call(["./decode_depthmap", xml_location, output_file])
+            if os.path.isfile(output_file):
+                print 'Depth file already exists'
+                continue
 
+            output_code = call(["./decode_depthmap", xml_location, output_file])
+            if output_code == 0:
+                print 'Succesfully converted ',pano_id,' to depth.txt' 
+            else:
+                print 'Unsuccessful. Could not convert ',pano_id,' to depth.txt . Returned with error ',output_code
 
+print "Fetching pano-ids"
 pano_list = fetch_pano_ids_from_webserver()
-
-download_panorama_images(storage_location, pano_list=pano_list)  # Trailing slash required
+# pano_list = [pano_list[111], pano_list[112]]
+print "Fetching Panoramas"
 download_panorama_depthdata(storage_location, pano_list=pano_list)
+download_panorama_images(storage_location, pano_list=pano_list)  # Trailing slash required
 generate_depthmapfiles(storage_location)