From 52cabea206ef398ef57803963c32f885310460a1 Mon Sep 17 00:00:00 2001 From: Di Wu Date: Fri, 1 Nov 2019 13:35:24 -0500 Subject: [PATCH] feat(original dockerfile for nde) --- jupyter-daids/Dockerfile | 15 + jupyter-daids/cull_idle_servers.py | 367 +++++++++++++++++++++++ jupyter-daids/jupyter_notebook_config.py | 8 + jupyter-dait/Dockerfile | 17 ++ jupyter-dait/cull_idle_servers.py | 367 +++++++++++++++++++++++ jupyter-dait/jupyter_notebook_config.py | 8 + jupyter-dmid/Dockerfile | 18 ++ jupyter-dmid/cull_idle_servers.py | 367 +++++++++++++++++++++++ jupyter-dmid/jupyter_notebook_config.py | 8 + rstudio-inflammatory/Dockerfile | 13 + 10 files changed, 1188 insertions(+) create mode 100644 jupyter-daids/Dockerfile create mode 100755 jupyter-daids/cull_idle_servers.py create mode 100644 jupyter-daids/jupyter_notebook_config.py create mode 100644 jupyter-dait/Dockerfile create mode 100755 jupyter-dait/cull_idle_servers.py create mode 100644 jupyter-dait/jupyter_notebook_config.py create mode 100644 jupyter-dmid/Dockerfile create mode 100755 jupyter-dmid/cull_idle_servers.py create mode 100644 jupyter-dmid/jupyter_notebook_config.py create mode 100644 rstudio-inflammatory/Dockerfile diff --git a/jupyter-daids/Dockerfile b/jupyter-daids/Dockerfile new file mode 100644 index 00000000..e76e74f0 --- /dev/null +++ b/jupyter-daids/Dockerfile @@ -0,0 +1,15 @@ +FROM quay.io/cdis/jupyter-notebook:latest + +USER root + +# COPY premade notebook +RUN mkdir /home/jovyan/daids-notebook + +ADD https://raw.githubusercontent.com/uc-cdis/ndh-demo/master/demo/DAIDS_notebook/CHARLIE_demo.ipynb /home/jovyan/daids-notebook/ +ADD https://raw.githubusercontent.com/uc-cdis/ndh-demo/master/demo/DAIDS_notebook/ndh_analysis_functions.py /home/jovyan/daids-notebook/ + + +RUN chmod -R 777 /home/jovyan/daids-notebook + +USER $NB_UID +WORKDIR /home/jovyan diff --git a/jupyter-daids/cull_idle_servers.py b/jupyter-daids/cull_idle_servers.py new file mode 100755 index 00000000..451b8abf --- /dev/null +++ b/jupyter-daids/cull_idle_servers.py @@ -0,0 +1,367 @@ +#!/usr/bin/env python3 +"""script to monitor and cull idle single-user servers + +Caveats: + +last_activity is not updated with high frequency, +so cull timeout should be greater than the sum of: + +- single-user websocket ping interval (default: 30s) +- JupyterHub.last_activity_interval (default: 5 minutes) + +You can run this as a service managed by JupyterHub with this in your config:: + + + c.JupyterHub.services = [ + { + 'name': 'cull-idle', + 'admin': True, + 'command': [sys.executable, 'cull_idle_servers.py', '--timeout=3600'], + } + ] + +Or run it manually by generating an API token and storing it in `JUPYTERHUB_API_TOKEN`: + + export JUPYTERHUB_API_TOKEN=$(jupyterhub token) + python3 cull_idle_servers.py [--timeout=900] [--url=http://127.0.0.1:8081/hub/api] + +This script uses the same ``--timeout`` and ``--max-age`` values for +culling users and users' servers. If you want a different value for +users and servers, you should add this script to the services list +twice, just with different ``name``s, different values, and one with +the ``--cull-users`` option. +""" + +from datetime import datetime, timezone +from functools import partial +import json +import os + +try: + from urllib.parse import quote +except ImportError: + from urllib import quote + +import dateutil.parser + +from tornado.gen import coroutine, multi +from tornado.locks import Semaphore +from tornado.log import app_log +from tornado.httpclient import AsyncHTTPClient, HTTPRequest +from tornado.ioloop import IOLoop, PeriodicCallback +from tornado.options import define, options, parse_command_line + + +def parse_date(date_string): + """Parse a timestamp + + If it doesn't have a timezone, assume utc + + Returned datetime object will always be timezone-aware + """ + dt = dateutil.parser.parse(date_string) + if not dt.tzinfo: + # assume naïve timestamps are UTC + dt = dt.replace(tzinfo=timezone.utc) + return dt + + +def format_td(td): + """ + Nicely format a timedelta object + + as HH:MM:SS + """ + if td is None: + return "unknown" + if isinstance(td, str): + return td + seconds = int(td.total_seconds()) + h = seconds // 3600 + seconds = seconds % 3600 + m = seconds // 60 + seconds = seconds % 60 + return "{h:02}:{m:02}:{seconds:02}".format(h=h, m=m, seconds=seconds) + + +@coroutine +def cull_idle(url, api_token, inactive_limit, cull_users=False, max_age=0, concurrency=10): + """Shutdown idle single-user servers + + If cull_users, inactive *users* will be deleted as well. + """ + auth_header = { + 'Authorization': 'token %s' % api_token, + } + req = HTTPRequest( + url=url + '/users', + headers=auth_header, + ) + now = datetime.now(timezone.utc) + client = AsyncHTTPClient() + + if concurrency: + semaphore = Semaphore(concurrency) + @coroutine + def fetch(req): + """client.fetch wrapped in a semaphore to limit concurrency""" + yield semaphore.acquire() + try: + return (yield client.fetch(req)) + finally: + yield semaphore.release() + else: + fetch = client.fetch + + resp = yield fetch(req) + users = json.loads(resp.body.decode('utf8', 'replace')) + futures = [] + + @coroutine + def handle_server(user, server_name, server): + """Handle (maybe) culling a single server + + Returns True if server is now stopped (user removable), + False otherwise. + """ + log_name = user['name'] + if server_name: + log_name = '%s/%s' % (user['name'], server_name) + if server.get('pending'): + app_log.warning( + "Not culling server %s with pending %s", + log_name, server['pending']) + return False + + # jupyterhub < 0.9 defined 'server.url' once the server was ready + # as an *implicit* signal that the server was ready. + # 0.9 adds a dedicated, explicit 'ready' field. + # By current (0.9) definitions, servers that have no pending + # events and are not ready shouldn't be in the model, + # but let's check just to be safe. + + if not server.get('ready', bool(server['url'])): + app_log.warning( + "Not culling not-ready not-pending server %s: %s", + log_name, server) + return False + + if server.get('started'): + age = now - parse_date(server['started']) + else: + # started may be undefined on jupyterhub < 0.9 + age = None + + # check last activity + # last_activity can be None in 0.9 + if server['last_activity']: + inactive = now - parse_date(server['last_activity']) + else: + # no activity yet, use start date + # last_activity may be None with jupyterhub 0.9, + # which introduces the 'started' field which is never None + # for running servers + inactive = age + + should_cull = (inactive is not None and + inactive.total_seconds() >= inactive_limit) + if should_cull: + app_log.info( + "Culling server %s (inactive for %s)", + log_name, format_td(inactive)) + + if max_age and not should_cull: + # only check started if max_age is specified + # so that we can still be compatible with jupyterhub 0.8 + # which doesn't define the 'started' field + if age is not None and age.total_seconds() >= max_age: + app_log.info( + "Culling server %s (age: %s, inactive for %s)", + log_name, format_td(age), format_td(inactive)) + should_cull = True + + if not should_cull: + app_log.debug( + "Not culling server %s (age: %s, inactive for %s)", + log_name, format_td(age), format_td(inactive)) + return False + + if server_name: + # culling a named server + delete_url = url + "/users/%s/servers/%s" % ( + quote(user['name']), quote(server['name']) + ) + else: + delete_url = url + '/users/%s/server' % quote(user['name']) + + req = HTTPRequest( + url=delete_url, method='DELETE', headers=auth_header, + ) + resp = yield fetch(req) + if resp.code == 202: + app_log.warning( + "Server %s is slow to stop", + log_name, + ) + # return False to prevent culling user with pending shutdowns + return False + return True + + @coroutine + def handle_user(user): + """Handle one user. + + Create a list of their servers, and async exec them. Wait for + that to be done, and if all servers are stopped, possibly cull + the user. + """ + # shutdown servers first. + # Hub doesn't allow deleting users with running servers. + # jupyterhub 0.9 always provides a 'servers' model. + # 0.8 only does this when named servers are enabled. + if 'servers' in user: + servers = user['servers'] + else: + # jupyterhub < 0.9 without named servers enabled. + # create servers dict with one entry for the default server + # from the user model. + # only if the server is running. + servers = {} + if user['server']: + servers[''] = { + 'last_activity': user['last_activity'], + 'pending': user['pending'], + 'url': user['server'], + } + server_futures = [ + handle_server(user, server_name, server) + for server_name, server in servers.items() + ] + results = yield multi(server_futures) + if not cull_users: + return + # some servers are still running, cannot cull users + still_alive = len(results) - sum(results) + if still_alive: + app_log.debug( + "Not culling user %s with %i servers still alive", + user['name'], still_alive) + return False + + should_cull = False + if user.get('created'): + age = now - parse_date(user['created']) + else: + # created may be undefined on jupyterhub < 0.9 + age = None + + # check last activity + # last_activity can be None in 0.9 + if user['last_activity']: + inactive = now - parse_date(user['last_activity']) + else: + # no activity yet, use start date + # last_activity may be None with jupyterhub 0.9, + # which introduces the 'created' field which is never None + inactive = age + + should_cull = (inactive is not None and + inactive.total_seconds() >= inactive_limit) + if should_cull: + app_log.info( + "Culling user %s (inactive for %s)", + user['name'], inactive) + + if max_age and not should_cull: + # only check created if max_age is specified + # so that we can still be compatible with jupyterhub 0.8 + # which doesn't define the 'started' field + if age is not None and age.total_seconds() >= max_age: + app_log.info( + "Culling user %s (age: %s, inactive for %s)", + user['name'], format_td(age), format_td(inactive)) + should_cull = True + + if not should_cull: + app_log.debug( + "Not culling user %s (created: %s, last active: %s)", + user['name'], format_td(age), format_td(inactive)) + return False + + req = HTTPRequest( + url=url + '/users/%s' % user['name'], + method='DELETE', + headers=auth_header, + ) + yield fetch(req) + return True + + for user in users: + futures.append((user['name'], handle_user(user))) + + for (name, f) in futures: + try: + result = yield f + except Exception: + app_log.exception("Error processing %s", name) + else: + if result: + app_log.debug("Finished culling %s", name) + + +if __name__ == '__main__': + define( + 'url', + default=os.environ.get('JUPYTERHUB_API_URL'), + help="The JupyterHub API URL", + ) + define('timeout', default=600, help="The idle timeout (in seconds)") + define('cull_every', default=0, + help="The interval (in seconds) for checking for idle servers to cull") + define('max_age', default=0, + help="The maximum age (in seconds) of servers that should be culled even if they are active") + define('cull_users', default=False, + help="""Cull users in addition to servers. + This is for use in temporary-user cases such as tmpnb.""", + ) + define('concurrency', default=10, + help="""Limit the number of concurrent requests made to the Hub. + + Deleting a lot of users at the same time can slow down the Hub, + so limit the number of API requests we have outstanding at any given time. + """ + ) + + parse_command_line() + if not options.cull_every: + options.cull_every = options.timeout // 2 + api_token = os.environ['JUPYTERHUB_API_TOKEN'] + + try: + AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient") + except ImportError as e: + app_log.warning( + "Could not load pycurl: %s\n" + "pycurl is recommended if you have a large number of users.", + e) + + loop = IOLoop.current() + cull = partial( + cull_idle, + url=options.url, + api_token=api_token, + inactive_limit=options.timeout, + cull_users=options.cull_users, + max_age=options.max_age, + concurrency=options.concurrency, + ) + # schedule first cull immediately + # because PeriodicCallback doesn't start until the end of the first interval + loop.add_callback(cull) + # schedule periodic cull + pc = PeriodicCallback(cull, 1e3 * options.cull_every) + pc.start() + try: + loop.start() + except KeyboardInterrupt: + pass diff --git a/jupyter-daids/jupyter_notebook_config.py b/jupyter-daids/jupyter_notebook_config.py new file mode 100644 index 00000000..a25ab206 --- /dev/null +++ b/jupyter-daids/jupyter_notebook_config.py @@ -0,0 +1,8 @@ +import os + +c.NotebookApp.tornado_settings = { + 'headers': { + 'Content-Security-Policy': + 'frame-ancestors self %s' % os.getenv('FRAME_ANCESTORS', '') + } +} \ No newline at end of file diff --git a/jupyter-dait/Dockerfile b/jupyter-dait/Dockerfile new file mode 100644 index 00000000..f09f19d9 --- /dev/null +++ b/jupyter-dait/Dockerfile @@ -0,0 +1,17 @@ +FROM quay.io/cdis/jupyter-notebook:latest + +USER root + +# COPY premade notebook +RUN mkdir /home/jovyan/DAIT-notebook + +ADD https://raw.githubusercontent.com/uc-cdis/ndh-demo/master/demo/DAIT_notebook/DAIT_lipids_demo.ipynb /home/jovyan/DAIT-notebook/ +ADD https://raw.githubusercontent.com/uc-cdis/ndh-demo/master/demo/DAIT_notebook/Microbiome.ipynb /home/jovyan/DAIT-notebook/ +ADD https://raw.githubusercontent.com/uc-cdis/ndh-demo/master/demo/DAIT_notebook/microbiome.R /home/jovyan/DAIT-notebook/ +ADD https://raw.githubusercontent.com/uc-cdis/ndh-demo/master/demo/DAIT_notebook/ndh_analysis_functions.py /home/jovyan/DAIT-notebook/ + + +RUN chmod -R 777 /home/jovyan/DAIT-notebook + +USER $NB_UID +WORKDIR /home/jovyan diff --git a/jupyter-dait/cull_idle_servers.py b/jupyter-dait/cull_idle_servers.py new file mode 100755 index 00000000..451b8abf --- /dev/null +++ b/jupyter-dait/cull_idle_servers.py @@ -0,0 +1,367 @@ +#!/usr/bin/env python3 +"""script to monitor and cull idle single-user servers + +Caveats: + +last_activity is not updated with high frequency, +so cull timeout should be greater than the sum of: + +- single-user websocket ping interval (default: 30s) +- JupyterHub.last_activity_interval (default: 5 minutes) + +You can run this as a service managed by JupyterHub with this in your config:: + + + c.JupyterHub.services = [ + { + 'name': 'cull-idle', + 'admin': True, + 'command': [sys.executable, 'cull_idle_servers.py', '--timeout=3600'], + } + ] + +Or run it manually by generating an API token and storing it in `JUPYTERHUB_API_TOKEN`: + + export JUPYTERHUB_API_TOKEN=$(jupyterhub token) + python3 cull_idle_servers.py [--timeout=900] [--url=http://127.0.0.1:8081/hub/api] + +This script uses the same ``--timeout`` and ``--max-age`` values for +culling users and users' servers. If you want a different value for +users and servers, you should add this script to the services list +twice, just with different ``name``s, different values, and one with +the ``--cull-users`` option. +""" + +from datetime import datetime, timezone +from functools import partial +import json +import os + +try: + from urllib.parse import quote +except ImportError: + from urllib import quote + +import dateutil.parser + +from tornado.gen import coroutine, multi +from tornado.locks import Semaphore +from tornado.log import app_log +from tornado.httpclient import AsyncHTTPClient, HTTPRequest +from tornado.ioloop import IOLoop, PeriodicCallback +from tornado.options import define, options, parse_command_line + + +def parse_date(date_string): + """Parse a timestamp + + If it doesn't have a timezone, assume utc + + Returned datetime object will always be timezone-aware + """ + dt = dateutil.parser.parse(date_string) + if not dt.tzinfo: + # assume naïve timestamps are UTC + dt = dt.replace(tzinfo=timezone.utc) + return dt + + +def format_td(td): + """ + Nicely format a timedelta object + + as HH:MM:SS + """ + if td is None: + return "unknown" + if isinstance(td, str): + return td + seconds = int(td.total_seconds()) + h = seconds // 3600 + seconds = seconds % 3600 + m = seconds // 60 + seconds = seconds % 60 + return "{h:02}:{m:02}:{seconds:02}".format(h=h, m=m, seconds=seconds) + + +@coroutine +def cull_idle(url, api_token, inactive_limit, cull_users=False, max_age=0, concurrency=10): + """Shutdown idle single-user servers + + If cull_users, inactive *users* will be deleted as well. + """ + auth_header = { + 'Authorization': 'token %s' % api_token, + } + req = HTTPRequest( + url=url + '/users', + headers=auth_header, + ) + now = datetime.now(timezone.utc) + client = AsyncHTTPClient() + + if concurrency: + semaphore = Semaphore(concurrency) + @coroutine + def fetch(req): + """client.fetch wrapped in a semaphore to limit concurrency""" + yield semaphore.acquire() + try: + return (yield client.fetch(req)) + finally: + yield semaphore.release() + else: + fetch = client.fetch + + resp = yield fetch(req) + users = json.loads(resp.body.decode('utf8', 'replace')) + futures = [] + + @coroutine + def handle_server(user, server_name, server): + """Handle (maybe) culling a single server + + Returns True if server is now stopped (user removable), + False otherwise. + """ + log_name = user['name'] + if server_name: + log_name = '%s/%s' % (user['name'], server_name) + if server.get('pending'): + app_log.warning( + "Not culling server %s with pending %s", + log_name, server['pending']) + return False + + # jupyterhub < 0.9 defined 'server.url' once the server was ready + # as an *implicit* signal that the server was ready. + # 0.9 adds a dedicated, explicit 'ready' field. + # By current (0.9) definitions, servers that have no pending + # events and are not ready shouldn't be in the model, + # but let's check just to be safe. + + if not server.get('ready', bool(server['url'])): + app_log.warning( + "Not culling not-ready not-pending server %s: %s", + log_name, server) + return False + + if server.get('started'): + age = now - parse_date(server['started']) + else: + # started may be undefined on jupyterhub < 0.9 + age = None + + # check last activity + # last_activity can be None in 0.9 + if server['last_activity']: + inactive = now - parse_date(server['last_activity']) + else: + # no activity yet, use start date + # last_activity may be None with jupyterhub 0.9, + # which introduces the 'started' field which is never None + # for running servers + inactive = age + + should_cull = (inactive is not None and + inactive.total_seconds() >= inactive_limit) + if should_cull: + app_log.info( + "Culling server %s (inactive for %s)", + log_name, format_td(inactive)) + + if max_age and not should_cull: + # only check started if max_age is specified + # so that we can still be compatible with jupyterhub 0.8 + # which doesn't define the 'started' field + if age is not None and age.total_seconds() >= max_age: + app_log.info( + "Culling server %s (age: %s, inactive for %s)", + log_name, format_td(age), format_td(inactive)) + should_cull = True + + if not should_cull: + app_log.debug( + "Not culling server %s (age: %s, inactive for %s)", + log_name, format_td(age), format_td(inactive)) + return False + + if server_name: + # culling a named server + delete_url = url + "/users/%s/servers/%s" % ( + quote(user['name']), quote(server['name']) + ) + else: + delete_url = url + '/users/%s/server' % quote(user['name']) + + req = HTTPRequest( + url=delete_url, method='DELETE', headers=auth_header, + ) + resp = yield fetch(req) + if resp.code == 202: + app_log.warning( + "Server %s is slow to stop", + log_name, + ) + # return False to prevent culling user with pending shutdowns + return False + return True + + @coroutine + def handle_user(user): + """Handle one user. + + Create a list of their servers, and async exec them. Wait for + that to be done, and if all servers are stopped, possibly cull + the user. + """ + # shutdown servers first. + # Hub doesn't allow deleting users with running servers. + # jupyterhub 0.9 always provides a 'servers' model. + # 0.8 only does this when named servers are enabled. + if 'servers' in user: + servers = user['servers'] + else: + # jupyterhub < 0.9 without named servers enabled. + # create servers dict with one entry for the default server + # from the user model. + # only if the server is running. + servers = {} + if user['server']: + servers[''] = { + 'last_activity': user['last_activity'], + 'pending': user['pending'], + 'url': user['server'], + } + server_futures = [ + handle_server(user, server_name, server) + for server_name, server in servers.items() + ] + results = yield multi(server_futures) + if not cull_users: + return + # some servers are still running, cannot cull users + still_alive = len(results) - sum(results) + if still_alive: + app_log.debug( + "Not culling user %s with %i servers still alive", + user['name'], still_alive) + return False + + should_cull = False + if user.get('created'): + age = now - parse_date(user['created']) + else: + # created may be undefined on jupyterhub < 0.9 + age = None + + # check last activity + # last_activity can be None in 0.9 + if user['last_activity']: + inactive = now - parse_date(user['last_activity']) + else: + # no activity yet, use start date + # last_activity may be None with jupyterhub 0.9, + # which introduces the 'created' field which is never None + inactive = age + + should_cull = (inactive is not None and + inactive.total_seconds() >= inactive_limit) + if should_cull: + app_log.info( + "Culling user %s (inactive for %s)", + user['name'], inactive) + + if max_age and not should_cull: + # only check created if max_age is specified + # so that we can still be compatible with jupyterhub 0.8 + # which doesn't define the 'started' field + if age is not None and age.total_seconds() >= max_age: + app_log.info( + "Culling user %s (age: %s, inactive for %s)", + user['name'], format_td(age), format_td(inactive)) + should_cull = True + + if not should_cull: + app_log.debug( + "Not culling user %s (created: %s, last active: %s)", + user['name'], format_td(age), format_td(inactive)) + return False + + req = HTTPRequest( + url=url + '/users/%s' % user['name'], + method='DELETE', + headers=auth_header, + ) + yield fetch(req) + return True + + for user in users: + futures.append((user['name'], handle_user(user))) + + for (name, f) in futures: + try: + result = yield f + except Exception: + app_log.exception("Error processing %s", name) + else: + if result: + app_log.debug("Finished culling %s", name) + + +if __name__ == '__main__': + define( + 'url', + default=os.environ.get('JUPYTERHUB_API_URL'), + help="The JupyterHub API URL", + ) + define('timeout', default=600, help="The idle timeout (in seconds)") + define('cull_every', default=0, + help="The interval (in seconds) for checking for idle servers to cull") + define('max_age', default=0, + help="The maximum age (in seconds) of servers that should be culled even if they are active") + define('cull_users', default=False, + help="""Cull users in addition to servers. + This is for use in temporary-user cases such as tmpnb.""", + ) + define('concurrency', default=10, + help="""Limit the number of concurrent requests made to the Hub. + + Deleting a lot of users at the same time can slow down the Hub, + so limit the number of API requests we have outstanding at any given time. + """ + ) + + parse_command_line() + if not options.cull_every: + options.cull_every = options.timeout // 2 + api_token = os.environ['JUPYTERHUB_API_TOKEN'] + + try: + AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient") + except ImportError as e: + app_log.warning( + "Could not load pycurl: %s\n" + "pycurl is recommended if you have a large number of users.", + e) + + loop = IOLoop.current() + cull = partial( + cull_idle, + url=options.url, + api_token=api_token, + inactive_limit=options.timeout, + cull_users=options.cull_users, + max_age=options.max_age, + concurrency=options.concurrency, + ) + # schedule first cull immediately + # because PeriodicCallback doesn't start until the end of the first interval + loop.add_callback(cull) + # schedule periodic cull + pc = PeriodicCallback(cull, 1e3 * options.cull_every) + pc.start() + try: + loop.start() + except KeyboardInterrupt: + pass diff --git a/jupyter-dait/jupyter_notebook_config.py b/jupyter-dait/jupyter_notebook_config.py new file mode 100644 index 00000000..a25ab206 --- /dev/null +++ b/jupyter-dait/jupyter_notebook_config.py @@ -0,0 +1,8 @@ +import os + +c.NotebookApp.tornado_settings = { + 'headers': { + 'Content-Security-Policy': + 'frame-ancestors self %s' % os.getenv('FRAME_ANCESTORS', '') + } +} \ No newline at end of file diff --git a/jupyter-dmid/Dockerfile b/jupyter-dmid/Dockerfile new file mode 100644 index 00000000..98fb2117 --- /dev/null +++ b/jupyter-dmid/Dockerfile @@ -0,0 +1,18 @@ +FROM quay.io/cdis/jupyter-notebook:latest + +USER root + +# COPY premade notebook +RUN mkdir /home/jovyan/DMID-notebook + +ADD https://raw.githubusercontent.com/uc-cdis/ndh-demo/master/demo/DMID_notebook/CalU3_TypeI_ISG.txt /home/jovyan/DMID-notebook/ +ADD https://raw.githubusercontent.com/uc-cdis/ndh-demo/master/demo/DMID_notebook/DMID-LHV-R.ipynb /home/jovyan/DMID-notebook/ +ADD https://raw.githubusercontent.com/uc-cdis/ndh-demo/master/demo/DMID_notebook/LHV_demo.ipynb /home/jovyan/DMID-notebook/ +ADD https://raw.githubusercontent.com/uc-cdis/ndh-demo/master/demo/DMID_notebook/ndh_analysis_function_dmid.r /home/jovyan/DMID-notebook/ +ADD https://raw.githubusercontent.com/uc-cdis/ndh-demo/master/demo/DMID_notebook/ndh_analysis_functions_dmid.py /home/jovyan/DMID-notebook/ + + +RUN chmod -R 777 /home/jovyan/DMID-notebook + +USER $NB_UID +WORKDIR /home/jovyan diff --git a/jupyter-dmid/cull_idle_servers.py b/jupyter-dmid/cull_idle_servers.py new file mode 100755 index 00000000..451b8abf --- /dev/null +++ b/jupyter-dmid/cull_idle_servers.py @@ -0,0 +1,367 @@ +#!/usr/bin/env python3 +"""script to monitor and cull idle single-user servers + +Caveats: + +last_activity is not updated with high frequency, +so cull timeout should be greater than the sum of: + +- single-user websocket ping interval (default: 30s) +- JupyterHub.last_activity_interval (default: 5 minutes) + +You can run this as a service managed by JupyterHub with this in your config:: + + + c.JupyterHub.services = [ + { + 'name': 'cull-idle', + 'admin': True, + 'command': [sys.executable, 'cull_idle_servers.py', '--timeout=3600'], + } + ] + +Or run it manually by generating an API token and storing it in `JUPYTERHUB_API_TOKEN`: + + export JUPYTERHUB_API_TOKEN=$(jupyterhub token) + python3 cull_idle_servers.py [--timeout=900] [--url=http://127.0.0.1:8081/hub/api] + +This script uses the same ``--timeout`` and ``--max-age`` values for +culling users and users' servers. If you want a different value for +users and servers, you should add this script to the services list +twice, just with different ``name``s, different values, and one with +the ``--cull-users`` option. +""" + +from datetime import datetime, timezone +from functools import partial +import json +import os + +try: + from urllib.parse import quote +except ImportError: + from urllib import quote + +import dateutil.parser + +from tornado.gen import coroutine, multi +from tornado.locks import Semaphore +from tornado.log import app_log +from tornado.httpclient import AsyncHTTPClient, HTTPRequest +from tornado.ioloop import IOLoop, PeriodicCallback +from tornado.options import define, options, parse_command_line + + +def parse_date(date_string): + """Parse a timestamp + + If it doesn't have a timezone, assume utc + + Returned datetime object will always be timezone-aware + """ + dt = dateutil.parser.parse(date_string) + if not dt.tzinfo: + # assume naïve timestamps are UTC + dt = dt.replace(tzinfo=timezone.utc) + return dt + + +def format_td(td): + """ + Nicely format a timedelta object + + as HH:MM:SS + """ + if td is None: + return "unknown" + if isinstance(td, str): + return td + seconds = int(td.total_seconds()) + h = seconds // 3600 + seconds = seconds % 3600 + m = seconds // 60 + seconds = seconds % 60 + return "{h:02}:{m:02}:{seconds:02}".format(h=h, m=m, seconds=seconds) + + +@coroutine +def cull_idle(url, api_token, inactive_limit, cull_users=False, max_age=0, concurrency=10): + """Shutdown idle single-user servers + + If cull_users, inactive *users* will be deleted as well. + """ + auth_header = { + 'Authorization': 'token %s' % api_token, + } + req = HTTPRequest( + url=url + '/users', + headers=auth_header, + ) + now = datetime.now(timezone.utc) + client = AsyncHTTPClient() + + if concurrency: + semaphore = Semaphore(concurrency) + @coroutine + def fetch(req): + """client.fetch wrapped in a semaphore to limit concurrency""" + yield semaphore.acquire() + try: + return (yield client.fetch(req)) + finally: + yield semaphore.release() + else: + fetch = client.fetch + + resp = yield fetch(req) + users = json.loads(resp.body.decode('utf8', 'replace')) + futures = [] + + @coroutine + def handle_server(user, server_name, server): + """Handle (maybe) culling a single server + + Returns True if server is now stopped (user removable), + False otherwise. + """ + log_name = user['name'] + if server_name: + log_name = '%s/%s' % (user['name'], server_name) + if server.get('pending'): + app_log.warning( + "Not culling server %s with pending %s", + log_name, server['pending']) + return False + + # jupyterhub < 0.9 defined 'server.url' once the server was ready + # as an *implicit* signal that the server was ready. + # 0.9 adds a dedicated, explicit 'ready' field. + # By current (0.9) definitions, servers that have no pending + # events and are not ready shouldn't be in the model, + # but let's check just to be safe. + + if not server.get('ready', bool(server['url'])): + app_log.warning( + "Not culling not-ready not-pending server %s: %s", + log_name, server) + return False + + if server.get('started'): + age = now - parse_date(server['started']) + else: + # started may be undefined on jupyterhub < 0.9 + age = None + + # check last activity + # last_activity can be None in 0.9 + if server['last_activity']: + inactive = now - parse_date(server['last_activity']) + else: + # no activity yet, use start date + # last_activity may be None with jupyterhub 0.9, + # which introduces the 'started' field which is never None + # for running servers + inactive = age + + should_cull = (inactive is not None and + inactive.total_seconds() >= inactive_limit) + if should_cull: + app_log.info( + "Culling server %s (inactive for %s)", + log_name, format_td(inactive)) + + if max_age and not should_cull: + # only check started if max_age is specified + # so that we can still be compatible with jupyterhub 0.8 + # which doesn't define the 'started' field + if age is not None and age.total_seconds() >= max_age: + app_log.info( + "Culling server %s (age: %s, inactive for %s)", + log_name, format_td(age), format_td(inactive)) + should_cull = True + + if not should_cull: + app_log.debug( + "Not culling server %s (age: %s, inactive for %s)", + log_name, format_td(age), format_td(inactive)) + return False + + if server_name: + # culling a named server + delete_url = url + "/users/%s/servers/%s" % ( + quote(user['name']), quote(server['name']) + ) + else: + delete_url = url + '/users/%s/server' % quote(user['name']) + + req = HTTPRequest( + url=delete_url, method='DELETE', headers=auth_header, + ) + resp = yield fetch(req) + if resp.code == 202: + app_log.warning( + "Server %s is slow to stop", + log_name, + ) + # return False to prevent culling user with pending shutdowns + return False + return True + + @coroutine + def handle_user(user): + """Handle one user. + + Create a list of their servers, and async exec them. Wait for + that to be done, and if all servers are stopped, possibly cull + the user. + """ + # shutdown servers first. + # Hub doesn't allow deleting users with running servers. + # jupyterhub 0.9 always provides a 'servers' model. + # 0.8 only does this when named servers are enabled. + if 'servers' in user: + servers = user['servers'] + else: + # jupyterhub < 0.9 without named servers enabled. + # create servers dict with one entry for the default server + # from the user model. + # only if the server is running. + servers = {} + if user['server']: + servers[''] = { + 'last_activity': user['last_activity'], + 'pending': user['pending'], + 'url': user['server'], + } + server_futures = [ + handle_server(user, server_name, server) + for server_name, server in servers.items() + ] + results = yield multi(server_futures) + if not cull_users: + return + # some servers are still running, cannot cull users + still_alive = len(results) - sum(results) + if still_alive: + app_log.debug( + "Not culling user %s with %i servers still alive", + user['name'], still_alive) + return False + + should_cull = False + if user.get('created'): + age = now - parse_date(user['created']) + else: + # created may be undefined on jupyterhub < 0.9 + age = None + + # check last activity + # last_activity can be None in 0.9 + if user['last_activity']: + inactive = now - parse_date(user['last_activity']) + else: + # no activity yet, use start date + # last_activity may be None with jupyterhub 0.9, + # which introduces the 'created' field which is never None + inactive = age + + should_cull = (inactive is not None and + inactive.total_seconds() >= inactive_limit) + if should_cull: + app_log.info( + "Culling user %s (inactive for %s)", + user['name'], inactive) + + if max_age and not should_cull: + # only check created if max_age is specified + # so that we can still be compatible with jupyterhub 0.8 + # which doesn't define the 'started' field + if age is not None and age.total_seconds() >= max_age: + app_log.info( + "Culling user %s (age: %s, inactive for %s)", + user['name'], format_td(age), format_td(inactive)) + should_cull = True + + if not should_cull: + app_log.debug( + "Not culling user %s (created: %s, last active: %s)", + user['name'], format_td(age), format_td(inactive)) + return False + + req = HTTPRequest( + url=url + '/users/%s' % user['name'], + method='DELETE', + headers=auth_header, + ) + yield fetch(req) + return True + + for user in users: + futures.append((user['name'], handle_user(user))) + + for (name, f) in futures: + try: + result = yield f + except Exception: + app_log.exception("Error processing %s", name) + else: + if result: + app_log.debug("Finished culling %s", name) + + +if __name__ == '__main__': + define( + 'url', + default=os.environ.get('JUPYTERHUB_API_URL'), + help="The JupyterHub API URL", + ) + define('timeout', default=600, help="The idle timeout (in seconds)") + define('cull_every', default=0, + help="The interval (in seconds) for checking for idle servers to cull") + define('max_age', default=0, + help="The maximum age (in seconds) of servers that should be culled even if they are active") + define('cull_users', default=False, + help="""Cull users in addition to servers. + This is for use in temporary-user cases such as tmpnb.""", + ) + define('concurrency', default=10, + help="""Limit the number of concurrent requests made to the Hub. + + Deleting a lot of users at the same time can slow down the Hub, + so limit the number of API requests we have outstanding at any given time. + """ + ) + + parse_command_line() + if not options.cull_every: + options.cull_every = options.timeout // 2 + api_token = os.environ['JUPYTERHUB_API_TOKEN'] + + try: + AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient") + except ImportError as e: + app_log.warning( + "Could not load pycurl: %s\n" + "pycurl is recommended if you have a large number of users.", + e) + + loop = IOLoop.current() + cull = partial( + cull_idle, + url=options.url, + api_token=api_token, + inactive_limit=options.timeout, + cull_users=options.cull_users, + max_age=options.max_age, + concurrency=options.concurrency, + ) + # schedule first cull immediately + # because PeriodicCallback doesn't start until the end of the first interval + loop.add_callback(cull) + # schedule periodic cull + pc = PeriodicCallback(cull, 1e3 * options.cull_every) + pc.start() + try: + loop.start() + except KeyboardInterrupt: + pass diff --git a/jupyter-dmid/jupyter_notebook_config.py b/jupyter-dmid/jupyter_notebook_config.py new file mode 100644 index 00000000..a25ab206 --- /dev/null +++ b/jupyter-dmid/jupyter_notebook_config.py @@ -0,0 +1,8 @@ +import os + +c.NotebookApp.tornado_settings = { + 'headers': { + 'Content-Security-Policy': + 'frame-ancestors self %s' % os.getenv('FRAME_ANCESTORS', '') + } +} \ No newline at end of file diff --git a/rstudio-inflammatory/Dockerfile b/rstudio-inflammatory/Dockerfile new file mode 100644 index 00000000..6098d2b6 --- /dev/null +++ b/rstudio-inflammatory/Dockerfile @@ -0,0 +1,13 @@ +FROM quay.io/cdis/rstudio:master + + +# COPY premade notebook +RUN mkdir /home/rstudio/inflammatory + +ADD https://raw.githubusercontent.com/uc-cdis/ndh-demo/master/demo/Inflammatory_RStudio/Gen3AuthHelper.R /home/rstudio/inflammatory +ADD https://raw.githubusercontent.com/uc-cdis/ndh-demo/master/demo/Inflammatory_RStudio/Gen3Submission.R /home/rstudio/inflammatory +ADD https://raw.githubusercontent.com/uc-cdis/ndh-demo/master/demo/Inflammatory_RStudio/immune_marker.R /home/rstudio/inflammatory + + + +RUN chmod -R 777 /home/rstudio/inflammatory \ No newline at end of file