diff --git a/.gitignore b/.gitignore index d9787eb..1e95114 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ tohme_crops *.pyc old *.swp +id_rsa venv crops/* *.log diff --git a/Dockerfile b/Dockerfile index 3fb7994..42e30a7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,9 @@ -FROM ubuntu:16.04 +FROM ubuntu:18.04 +ARG DEBIAN_FRONTEND=noninteractive COPY . /app WORKDIR /app RUN apt-get update -RUN apt-get install -y python-pip libfreetype6-dev libxft-dev python-dev libjpeg8-dev libblas-dev liblapack-dev libatlas-base-dev gfortran python-tk +RUN apt-get install -y python-pip sshfs libfreetype6-dev libxft-dev python-dev libjpeg8-dev libblas-dev liblapack-dev libatlas-base-dev gfortran python-tk RUN pip install -r requirements.txt -ENTRYPOINT ["python", "DownloadRunner.py"] +ENTRYPOINT ["./DownloadRunnerDockerEntrypoint.sh"] CMD [] \ No newline at end of file diff --git a/DownloadRunner.py b/DownloadRunner.py index 3b5fc5f..29d6adf 100644 --- a/DownloadRunner.py +++ b/DownloadRunner.py @@ -33,7 +33,7 @@ def __getattr__(self, name): DownloadResult = Enum(('skipped', 'success', 'fallback_success', 'failure')) -delay = 30 +delay = 0 if len(argv) != 3: print("Usage: python DownloadRunner.py sidewalk_server_domain storage_path") @@ -128,8 +128,12 @@ def download_panorama_images(storage_path, pano_list): def download_single_pano(storage_path, pano_id): base_url = 'http://maps.google.com/cbk?' pano_xml_path = os.path.join(storage_path, pano_id[:2], pano_id + ".xml") - - (image_width,image_height) = extract_panowidthheight(pano_xml_path) + image_width = 16384 + image_height = 8192 + try: + (image_width,image_height) = extract_panowidthheight(pano_xml_path) + except Exception as e: + print("IMAGEDOWNLOAD - WARN - using fallback pano size for %s" % (pano_id)) im_dimension = (image_width, image_height) blank_image = Image.new('RGBA', im_dimension, (0, 0, 0, 0)) @@ -278,7 +282,7 @@ def generate_depthmapfiles(path_to_scrapes): pano_list = fetch_pano_ids_from_webserver() ##### Debug Line - remove for prod ########## -pano_list = [pano_list[111], pano_list[112]] +# pano_list = [pano_list[111], pano_list[112]] ############################################# print "Fetching Panoramas" diff --git a/DownloadRunnerDockerEntrypoint.sh b/DownloadRunnerDockerEntrypoint.sh new file mode 100755 index 0000000..6ed249d --- /dev/null +++ b/DownloadRunnerDockerEntrypoint.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# ./DownloadRunnerDockerEntrypoint sidewalk_server_fqdn +# ./DownloadRunnerDockerEntrypoint sidewalk_server_fqdn user@host:/remote/path port + +mkdir -p /tmp/download_dest +chmod 600 /app/id_rsa +if [ $# -eq 1 ]; then + python DownloadRunner.py $1 /tmp/download_dest +elif [ $# -eq 3 ]; then + echo "Mounting $2 port $3 for $1" + sshfs -o IdentityFile=/app/id_rsa,StrictHostKeyChecking=no $2 /tmp/download_dest -p $3 && python DownloadRunner.py $1 /tmp/download_dest; umount /tmp/download_dest +else + echo "Usage:" + echo " ./DownloadRunnerDockerEntrypoint sidewalk_server_fqdn" + echo " ./DownloadRunnerDockerEntrypoint sidewalk_server_fqdn user@host:/remote/path port" +fi \ No newline at end of file diff --git a/kubernetes.yaml b/kubernetes.yaml new file mode 100644 index 0000000..488c1cb --- /dev/null +++ b/kubernetes.yaml @@ -0,0 +1,33 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: prodall3 +spec: + completions: 9 + parallelism: 9 + template: + metadata: + annotations: + container.apparmor.security.beta.kubernetes.io/scraper: unconfined + spec: + containers: + - name: scraper + resources: + requests: + memory: "3000Mi" + cpu: "0.5" + limits: + cpu: "0.8" + image: gcr.io/t-pulsar-252617/panoscraper:v1 + command: ["/app/DownloadRunnerDockerEntrypoint.sh", "sidewalk-sea.cs.washington.edu", "antli@128.8.132.187:/fs/jonf-sidewalk/Panoramas/scrapes_dump_seattle", "22"] + volumeMounts: + - mountPath: /dev/fuse + name: dev-fuse + securityContext: + privileged: true + volumes: + - name: dev-fuse + hostPath: + path: /dev/fuse + restartPolicy: Never + backoffLimit: 4 \ No newline at end of file