diff --git a/docs/using/common.md b/docs/using/common.md index 34f09e6ac9..95eb7d075b 100644 --- a/docs/using/common.md +++ b/docs/using/common.md @@ -91,7 +91,7 @@ You do so by passing arguments to the `docker run` command. ```{note} `NB_UMASK` when set only applies to the Jupyter process itself - - you cannot use it to set a `umask` for additional files created during `run-hooks.sh`. + you cannot use it to set a `umask` for additional files created during `run-hooks.py`. For example, via `pip` or `conda`. If you need to set a `umask` for these, you **must** set the `umask` value for each command. ``` @@ -140,7 +140,7 @@ or executables (`chmod +x`) to be run to the paths below: - `/usr/local/bin/before-notebook.d/` - handled **after** all the standard options noted above are applied and ran right before the Server launches -See the `run-hooks.sh` script [here](https://github.com/jupyter/docker-stacks/blob/main/images/docker-stacks-foundation/run-hooks.sh) and how it's used in the [`start.sh`](https://github.com/jupyter/docker-stacks/blob/main/images/docker-stacks-foundation/start.sh) +See the `run-hooks.py` script [here](https://github.com/jupyter/docker-stacks/blob/main/images/docker-stacks-foundation/run-hooks.py) and how it's used in the [`start.sh`](https://github.com/jupyter/docker-stacks/blob/main/images/docker-stacks-foundation/start.sh) script for execution details. ## SSL Certificates diff --git a/docs/using/selecting.md b/docs/using/selecting.md index 89e983e199..b2347bfdd8 100644 --- a/docs/using/selecting.md +++ b/docs/using/selecting.md @@ -36,7 +36,7 @@ It contains: with ownership over the `/home/jovyan` and `/opt/conda` paths - `tini` as the container entry point - A `start.sh` script as the default command - useful for running alternative commands in the container as applications are added (e.g. `ipython`, `jupyter kernelgateway`, `jupyter lab`) -- A `run-hooks.sh` script, which can source/run files in a given directory +- A `run-hooks.py` script, which can source `.sh` files and call executable files in a given directory - Options for a passwordless sudo - Common system libraries like `bzip2`, `ca-certificates`, `locales` - `wget` to download external files diff --git a/images/docker-stacks-foundation/Dockerfile b/images/docker-stacks-foundation/Dockerfile index e267f5b929..a1d2153800 100644 --- a/images/docker-stacks-foundation/Dockerfile +++ b/images/docker-stacks-foundation/Dockerfile @@ -127,7 +127,7 @@ ENTRYPOINT ["tini", "-g", "--"] CMD ["start.sh"] # Copy local files as late as possible to avoid cache busting -COPY run-hooks.sh start.sh /usr/local/bin/ +COPY run-hooks.py run-hooks.sh start.sh /usr/local/bin/ USER root diff --git a/images/docker-stacks-foundation/run-hooks.py b/images/docker-stacks-foundation/run-hooks.py new file mode 100755 index 0000000000..93ff490ae4 --- /dev/null +++ b/images/docker-stacks-foundation/run-hooks.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +import json + +# The run-hooks.py script looks for *.sh scripts to source +# and executable files to run within a passed directory +import os +import subprocess +import sys +import tempfile +from pathlib import PosixPath +from textwrap import dedent + + +def source(path: PosixPath) -> None: + """ + Emulate the bash `source` command accurately + + When used in bash, `source` executes the passed file in the current 'context' + of the script from where it is called. This primarily deals with how + bash (and thus environment variables) are modified. + + 1. Any bash variables (particularly any set via `export`) are passed on to the + sourced script as their values are at the point source is called + 2. The sourced script can itself use `export` to affect the bash variables of the + parent script that called it. + + (2) is the primary difference between `source` and just calling a shell script, + and makes it possible for a set of scripts running in sequence to share data by + passing bash variables across with `export`. + + Given bash variables are environment variables, we will simply look for all modified + environment variables in the script we have sourced, and update the calling python + script's environment variables to match. + + Args: + path (PosixPath): Valid bash script to source + """ + # We start a bash process and have it `source` the script we are given. Then, we + # use python (for convenience) to dump the environment variables from the bash process into + # json (we could use `env` but then handling multiline variable values becomes a nightmare). + # The json is written to a temporary file we create. We read this json, and update our python + # process' environment variable with whatever we get back from bash. + with tempfile.NamedTemporaryFile() as bash_file, tempfile.NamedTemporaryFile() as py_file, tempfile.NamedTemporaryFile() as env_vars_file: + py_file.write( + dedent( + f""" + import os + import json + with(open("{env_vars_file.name}", "w")) as f: + json.dump(dict(os.environ), f) + """ + ).encode() + ) + py_file.flush() + + bash_file.write( + dedent( + f""" + #!/bin/bash + source {path} + {sys.executable} {py_file.name} + """ + ).encode() + ) + bash_file.flush() + + run = subprocess.run(["/bin/bash", bash_file.name]) + + if run.returncode != 0: + print( + f"{path} has failed with return code {run.returncode}, continuing execution" + ) + return + + # Get env vars of the sourced process after it exits + # This may contain *additional* env vars, or some may be *removed* + child_env_vars = json.load(env_vars_file) + + # Remove any env vars from our environment that were explicitly removed from the child + removed_env_vars = set(os.environ.keys()) - set(child_env_vars.keys()) + for name in removed_env_vars: + del os.environ[name] + + # Update our environment with any *new* or *modified* env vars from the child process + os.environ.update(child_env_vars) + + +if len(sys.argv) != 2: + print("Should pass exactly one directory") + sys.exit(1) + +hooks_directory = PosixPath(sys.argv[1]) + +if not hooks_directory.exists(): + print(f"Directory {hooks_directory} does not exist") + sys.exit(1) + +if not hooks_directory.is_dir(): + print(f"{hooks_directory} is not a directory") + sys.exit(1) + +print(f"Running hooks in: {hooks_directory} as uid: {os.getuid()} gid: {os.getgid()}") + +for f in sorted(hooks_directory.iterdir()): + if f.suffix == ".sh": + print(f"Sourcing shell script: {f}") + source(f) + elif os.access(f, os.X_OK): + print(f"Running executable: {f}") + run = subprocess.run([str(f)]) + if run.returncode != 0: + print( + f"{f} has failed with return code {run.returncode}, continuing execution" + ) + else: + print(f"Ignoring non-executable: {f}") + + +print(f"Done running hooks in: {hooks_directory}") diff --git a/images/docker-stacks-foundation/run-hooks.sh b/images/docker-stacks-foundation/run-hooks.sh index 15df23c35e..6e760e4907 100755 --- a/images/docker-stacks-foundation/run-hooks.sh +++ b/images/docker-stacks-foundation/run-hooks.sh @@ -1,46 +1,6 @@ #!/bin/bash # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. +# echo "WARNING: Use run-hooks.py instead" -# The run-hooks.sh script looks for *.sh scripts to source -# and executable files to run within a passed directory - -if [ "$#" -ne 1 ]; then - echo "Should pass exactly one directory" - return 1 -fi - -if [[ ! -d "${1}" ]]; then - echo "Directory ${1} doesn't exist or is not a directory" - return 1 -fi - -echo "Running hooks in: ${1} as uid: $(id -u) gid: $(id -g)" -for f in "${1}/"*; do - # Handling a case when the directory is empty - [ -e "${f}" ] || continue - case "${f}" in - *.sh) - echo "Sourcing shell script: ${f}" - # shellcheck disable=SC1090 - source "${f}" - # shellcheck disable=SC2181 - if [ $? -ne 0 ]; then - echo "${f} has failed, continuing execution" - fi - ;; - *) - if [ -x "${f}" ]; then - echo "Running executable: ${f}" - "${f}" - # shellcheck disable=SC2181 - if [ $? -ne 0 ]; then - echo "${f} has failed, continuing execution" - fi - else - echo "Ignoring non-executable: ${f}" - fi - ;; - esac -done -echo "Done running hooks in: ${1}" +exec /usr/local/bin/run-hooks.py "$@" diff --git a/tests/docker-stacks-foundation/test_run_hooks.py b/tests/docker-stacks-foundation/test_run_hooks.py index 87467f9fb3..ebd4d71418 100644 --- a/tests/docker-stacks-foundation/test_run_hooks.py +++ b/tests/docker-stacks-foundation/test_run_hooks.py @@ -44,7 +44,7 @@ def test_run_hooks_missing_dir(container: TrackedContainer) -> None: "source /usr/local/bin/run-hooks.sh /tmp/missing-dir/", ], ) - assert "Directory /tmp/missing-dir/ doesn't exist or is not a directory" in logs + assert "Directory /tmp/missing-dir does not exist" in logs def test_run_hooks_dir_is_file(container: TrackedContainer) -> None: @@ -58,7 +58,7 @@ def test_run_hooks_dir_is_file(container: TrackedContainer) -> None: "touch /tmp/some-file && source /usr/local/bin/run-hooks.sh /tmp/some-file", ], ) - assert "Directory /tmp/some-file doesn't exist or is not a directory" in logs + assert "/tmp/some-file is not a directory" in logs def test_run_hooks_empty_dir(container: TrackedContainer) -> None: @@ -105,7 +105,7 @@ def test_run_hooks_executables(container: TrackedContainer) -> None: ) assert "Executable python file was successfully run" in logs - assert "Ignoring non-executable: /home/jovyan/data-copy//non_executable.py" in logs + assert "Ignoring non-executable: /home/jovyan/data-copy/non_executable.py" in logs assert "SOME_VAR is 123" in logs