Add script and build workflow for snapshot job

gf-dcc · Jun 20, 2023 · d2c201c · d2c201c
1 parent 631ef6c
commit d2c201c
Show file tree

Hide file tree

Showing 4 changed files with 145 additions and 0 deletions.
diff --git a/.github/workflows/docker-snapshot.yml b/.github/workflows/docker-snapshot.yml
@@ -0,0 +1,61 @@
+name: Dockerize snapshot
+
+# For only changes in snapshot code,
+# rebuild on push to main (which is already vetted in PR to develop)
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - snapshot/**
+  pull_request:
+    branches:
+      - main
+    paths:
+      - snapshot/**
+
+# Use ghcr registry
+env:
+  REGISTRY: ghcr.io
+  # github.repository as <account>/<repo>
+  IMAGE_NAME: ${{ github.repository }}
+
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v2
+
+      # Login against a Docker registry except on PR
+      # (confirm that image can be built, but don't push to registry)
+      # https://github.com/docker/login-action
+      - name: Log into registry ${{ env.REGISTRY }}
+        if: github.event_name != 'pull_request'
+        uses: docker/login-action@28218f9b04b4f3f62068d7b6ce6ca5b26e35336c
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v2
+
+      - name: Get short hash for tag
+        id: tag
+        run: echo "::set-output name=sha7::$(git rev-parse --short $GITHUB_SHA)"
+
+      # Build and push Docker image with Buildx (don't push on PR)
+      # https://github.com/docker/build-push-action
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v3
+        with:
+          context: ./snapshot
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-snapshot:latest,${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-snapshot:${{steps.tag.outputs.sha7}}
+          platforms: linux/amd64
diff --git a/snapshot/Dockerfile b/snapshot/Dockerfile
@@ -0,0 +1,6 @@
+FROM sagebionetworks/synapsepythonclient:v2.7.0
+
+COPY snapshot.py /snapshot.py
+
+ENTRYPOINT ["python3", "/snapshot.py"]
+
diff --git a/snapshot/README.md b/snapshot/README.md
@@ -0,0 +1,29 @@
+## Snapshot
+
+This is used in a scheduled job to batch version selected portal assets (tables and views, currently). 
+
+### Secrets and env vars
+
+```
+SCHEDULED_JOB_SECRETS={"SYNAPSE_AUTH_TOKEN":"xxxxxxxxxxxxxxxxxxxxxxxxxxxx"}
+LABEL=snapshot
+SCHEDULE=weekly
+COMMENT="Scheduled snapshot"
+SLACK=https://hooks.slack.com/services/xxxxxxxxxxxxxxxxxxxxxxxxxxxx
+```
+
+### Example build
+
+`docker build -t ghcr.io/nf-osi/jobs-snapshot .`
+
+See images with `docker image ls`
+
+### Example run 
+
+`docker run --env-file envfile run ghcr.io/nf-osi/jobs-snapshot syn27242487 syn27242485`
+
+### To do
+
+- Configure for other app webhooks, i.e. if something else needs to happen after snapshot
+- Better login checks
+
diff --git a/snapshot/snapshot.py b/snapshot/snapshot.py
@@ -0,0 +1,49 @@
+import json, os, synapseclient, requests, traceback, sys
+from datetime import datetime
+
+# Secrets
+secrets = json.loads(os.getenv("SCHEDULED_JOB_SECRETS"))
+auth_token = secrets["SYNAPSE_AUTH_TOKEN"]
+
+# Login
+syn = synapseclient.Synapse()
+syn.login(authToken=auth_token)
+
+# Snapshot is used to version tables, so targets should be ids of tables
+# Instead of hard-coding the list of ids here, ids are stored/updated in `syn51729134` and queried during run
+
+# Query for list of entities to version:
+targets_set_list = ['syn28142805']
+reference = "syn51729134"
+ids = syn.tableQuery(f"SELECT id FROM {reference}").asDataFrame()
+targets = targets_set_list + ids['id'].tolist()
+
+target_comment = os.getenv("COMMENT")
+target_label = datetime.now()
+job_schedule = os.getenv("SCHEDULE")
+job_label = os.getenv("LABEL")
+slack = os.getenv("SLACK") # Slack webhook to send notifications, if given to the job container
+
+print(f"Targets: {targets}")
+
+def slack_report(slack, success:bool, job_schedule, job_label, target, version = ''):
+    if success:
+        txt = ":white_check_mark: " + job_schedule + " - " + job_label + " succeeded, updated to *" + target + "." + str(
+                version) + "* just now."
+    else:
+        txt = ":x: " + job_schedule + " - " + job_label + " failed just now for *" + target + "* :worried:"
+
+    msg = json.dumps({"text": txt})
+    r = requests.post(slack, data=msg, headers={'Content-type': 'application/json'})
+    print(r.status_code)
+
+# Iterate   
+for target in targets:
+    try:
+        version = syn.create_snapshot_version(table=target, comment=target_comment, label=target_label)
+        if slack is not None:
+            slack_report(slack, success=True, job_schedule=job_schedule, job_label=job_label, target=target, version=version)
+    except:
+        traceback.print_exc()
+        if slack is not None:
+            slack_report(slack, success=False, job_schedule=job_schedule, job_label=job_label, target=target)