jupyter · yuvipanda · Oct 17, 2023 · Oct 17, 2023 · Oct 17, 2023 · Oct 17, 2023
diff --git a/docs/using/common.md b/docs/using/common.md
@@ -86,7 +86,7 @@ You do so by passing arguments to the `docker run` command.
 
   ```{note}
   `NB_UMASK` when set only applies to the Jupyter process itself -
-  you cannot use it to set a `umask` for additional files created during `run-hooks.sh`.
+  you cannot use it to set a `umask` for additional files created during `run-hooks.py`.
   For example, via `pip` or `conda`.
   If you need to set a `umask` for these, you **must** set the `umask` value for each command.
   ```
@@ -135,7 +135,7 @@ or executables (`chmod +x`) to be run to the paths below:
 - `/usr/local/bin/before-notebook.d/` - handled **after** all the standard options noted above are applied
   and ran right before the Server launches
 
-See the `run-hooks.sh` script [here](https://github.com/jupyter/docker-stacks/blob/main/images/docker-stacks-foundation/run-hooks.sh) and how it's used in the [`start.sh`](https://github.com/jupyter/docker-stacks/blob/main/images/docker-stacks-foundation/start.sh)
+See the `run-hooks.py` script [here](https://github.com/jupyter/docker-stacks/blob/main/images/docker-stacks-foundation/run-hooks.py) and how it's used in the [`start.sh`](https://github.com/jupyter/docker-stacks/blob/main/images/docker-stacks-foundation/start.sh)
 script for execution details.
 
 ## SSL Certificates

diff --git a/docs/using/selecting.md b/docs/using/selecting.md
@@ -36,7 +36,7 @@ It contains:
   with ownership over the `/home/jovyan` and `/opt/conda` paths
 - `tini` as the container entry point
 - A `start.sh` script as the default command - useful for running alternative commands in the container as applications are added (e.g. `ipython`, `jupyter kernelgateway`, `jupyter lab`)
-- A `run-hooks.sh` script, which can source/run files in a given directory
+- A `run-hooks.py` script, which can source `.sh` files and call executable files in a given directory
 - Options for a passwordless sudo
 - Common system libraries like `bzip2`, `ca-certificates`, `locales`
 - `wget` to download external files

diff --git a/images/docker-stacks-foundation/Dockerfile b/images/docker-stacks-foundation/Dockerfile
@@ -127,7 +127,7 @@ ENTRYPOINT ["tini", "-g", "--"]
 CMD ["start.sh"]
 
 # Copy local files as late as possible to avoid cache busting
-COPY run-hooks.sh start.sh /usr/local/bin/
+COPY run-hooks.py run-hooks.sh start.sh /usr/local/bin/
 
 USER root
 

diff --git a/images/docker-stacks-foundation/run-hooks.py b/images/docker-stacks-foundation/run-hooks.py
@@ -0,0 +1,122 @@
+#!/usr/bin/env python
+# Copyright (c) Jupyter Development Team.
+# Distributed under the terms of the Modified BSD License.
+
+import json
+
+# The run-hooks.py script looks for *.sh scripts to source
+# and executable files to run within a passed directory
+import os
+import subprocess
+import sys
+import tempfile
+from pathlib import PosixPath
+from textwrap import dedent
+
+
+def source(path: PosixPath) -> None:
+    """
+    Emulate the bash `source` command accurately
+
+    When used in bash, `source` executes the passed file in the current 'context'
+    of the script from where it is called. This primarily deals with how
+    bash (and thus environment variables) are modified.
+
+    1. Any bash variables (particularly any set via `export`) are passed on to the
+       sourced script as their values are at the point source is called
+    2. The sourced script can itself use `export` to affect the bash variables of the
+       parent script that called it.
+
+    (2) is the primary difference between `source` and just calling a shell script,
+    and makes it possible for a set of scripts running in sequence to share data by
+    passing bash variables across with `export`.
+
+    Given bash variables are environment variables, we will simply look for all modified
+    environment variables in the script we have sourced, and update the calling python
+    script's environment variables to match.
+
+    Args:
+        path (PosixPath): Valid bash script to source
+    """
+    # We start a bash process and have it `source` the script we are given. Then, we
+    # use python (for convenience) to dump the environment variables from the bash process into
+    # json (we could use `env` but then handling multiline variable values becomes a nightmare).
+    # The json is written to a temporary file we create. We read this json, and update our python
+    # process' environment variable with whatever we get back from bash.
+    with tempfile.NamedTemporaryFile() as bash_file, tempfile.NamedTemporaryFile() as py_file, tempfile.NamedTemporaryFile() as env_vars_file:
+        py_file.write(
+            dedent(
+                f"""
+                import os
+                import json
+                with(open("{env_vars_file.name}", "w")) as f:
+                    json.dump(dict(os.environ), f)
+                """
+            ).encode()
+        )
+        py_file.flush()
+
+        bash_file.write(
+            dedent(
+                f"""
+                #!/bin/bash
+                source {path}
+                {sys.executable} {py_file.name}
+                """
+            ).encode()
+        )
+        bash_file.flush()
+
+        run = subprocess.run(["/bin/bash", bash_file.name])
+
+        if run.returncode != 0:
+            print(
+                f"{path} has failed with return code {run.returncode}, continuing execution"
+            )
+            return
+
+        # Get env vars of the sourced process after it exits
+        # This may contain *additional* env vars, or some may be *removed*
+        child_env_vars = json.load(env_vars_file)
+
+        # Remove any env vars from our environment that were explicitly removed from the child
+        removed_env_vars = set(os.environ.keys()) - set(child_env_vars.keys())
+        for name in removed_env_vars:
+            del os.environ[name]
+
+        # Update our environment with any *new* or *modified* env vars from the child process
+        os.environ.update(child_env_vars)
+
+
+if len(sys.argv) != 2:
+    print("Should pass exactly one directory")
+    sys.exit(1)
+
+hooks_directory = PosixPath(sys.argv[1])
+
+if not hooks_directory.exists():
+    print(f"Directory {hooks_directory} does not exist")
+    sys.exit(1)
+
+if not hooks_directory.is_dir():
+    print(f"{hooks_directory} is not a directory")
+    sys.exit(1)
+
+print(f"Running hooks in: {hooks_directory} as uid: {os.getuid()} gid: {os.getgid()}")
+
+for f in sorted(hooks_directory.iterdir()):
+    if f.suffix == ".sh":
+        print(f"Sourcing shell script: {f}")
+        source(f)
+    elif os.access(f, os.X_OK):
+        print(f"Running executable: {f}")
+        run = subprocess.run([str(f)])
+        if run.returncode != 0:
+            print(
+                f"{f} has failed with return code {run.returncode}, continuing execution"
+            )
+    else:
+        print(f"Ignoring non-executable: {f}")
+
+
+print(f"Done running hooks in: {hooks_directory}")
diff --git a/images/docker-stacks-foundation/run-hooks.sh b/images/docker-stacks-foundation/run-hooks.sh
@@ -1,46 +1,6 @@
 #!/bin/bash
 # Copyright (c) Jupyter Development Team.
 # Distributed under the terms of the Modified BSD License.
+# echo "WARNING: Use run-hooks.py instead"
 
-# The run-hooks.sh script looks for *.sh scripts to source
-# and executable files to run within a passed directory
-
-if [ "$#" -ne 1 ]; then
-    echo "Should pass exactly one directory"
-    return 1
-fi
-
-if [[ ! -d "${1}" ]] ; then
-    echo "Directory ${1} doesn't exist or is not a directory"
-    return 1
-fi
-
-echo "Running hooks in: ${1} as uid: $(id -u) gid: $(id -g)"
-for f in "${1}/"*; do
-    # Hadling a case when the directory is empty
-    [ -e "${f}" ] || continue
-    case "${f}" in
-        *.sh)
-            echo "Sourcing shell script: ${f}"
-            # shellcheck disable=SC1090
-            source "${f}"
-            # shellcheck disable=SC2181
-            if [ $? -ne 0 ] ; then
-                echo "${f} has failed, continuing execution"
-            fi
-            ;;
-        *)
-            if [ -x "${f}" ] ; then
-                echo "Running executable: ${f}"
-                "${f}"
-                # shellcheck disable=SC2181
-                if [ $? -ne 0 ] ; then
-                    echo "${f} has failed, continuing execution"
-                fi
-            else
-                echo "Ignoring non-executable: ${f}"
-            fi
-            ;;
-    esac
-done
-echo "Done running hooks in: ${1}"
+exec /usr/local/bin/run-hooks.py "$@"
diff --git a/tests/docker-stacks-foundation/test_run_hooks.py b/tests/docker-stacks-foundation/test_run_hooks.py
@@ -44,7 +44,7 @@ def test_run_hooks_missing_dir(container: TrackedContainer) -> None:
             "source /usr/local/bin/run-hooks.sh /tmp/missing-dir/",
         ],
     )
-    assert "Directory /tmp/missing-dir/ doesn't exist or is not a directory" in logs
+    assert "Directory /tmp/missing-dir does not exist" in logs
 
 
 def test_run_hooks_dir_is_file(container: TrackedContainer) -> None:
@@ -58,7 +58,7 @@ def test_run_hooks_dir_is_file(container: TrackedContainer) -> None:
             "touch /tmp/some-file && source /usr/local/bin/run-hooks.sh /tmp/some-file",
         ],
     )
-    assert "Directory /tmp/some-file doesn't exist or is not a directory" in logs
+    assert "/tmp/some-file is not a directory" in logs
 
 
 def test_run_hooks_empty_dir(container: TrackedContainer) -> None:
@@ -105,7 +105,7 @@ def test_run_hooks_executables(container: TrackedContainer) -> None:
     )
 
     assert "Executable python file was successfully run" in logs
-    assert "Ignoring non-executable: /home/jovyan/data-copy//non_executable.py" in logs
+    assert "Ignoring non-executable: /home/jovyan/data-copy/non_executable.py" in logs
     assert "SOME_VAR is 123" in logs