From f7aad800a66ebce9f9c14e83b065a473351f7766 Mon Sep 17 00:00:00 2001
From: Dan Nicholson <dbn@endlessos.org>
Date: Sat, 13 Apr 2024 10:56:22 -0600
Subject: [PATCH 1/4] utils: Stop filtering environment variables

The logic of it makes sense - prevent programs from accessing sensitive
environment variables. However, we have no idea whether the programs
being executed depend on those environment variables. It would be
entirely understandable if a tool to inspect a remote repository
required authentication, for example. The broad filtering being done
also makes it likely that an insensitive environment variable was
removed. There's really no way to win that game.

This also fixes an issue where GIT_AUTHOR_NAME and GIT_AUTHOR_EMAIL were
being filtered out of the commit command. This appears to be a bug in
python since the altered environment somehow escaped past its usage and
only via subprocess. In any case, removing the filtering fixes the
problem.

Fixes: #413
---
 src/lib/utils.py   | 12 ------------
 tests/test_util.py | 31 -------------------------------
 2 files changed, 43 deletions(-)

diff --git a/src/lib/utils.py b/src/lib/utils.py
index 2aefe272..8ffbb606 100644
--- a/src/lib/utils.py
+++ b/src/lib/utils.py
@@ -28,7 +28,6 @@
 import tempfile
 import urllib.request
 import urllib.parse
-import copy
 import typing as t
 from distutils.version import StrictVersion, LooseVersion
 import asyncio
@@ -272,15 +271,6 @@ def filter_versions(
     )
 
 
-def clear_env(environ):
-    new_env = copy.deepcopy(environ)
-    for varname in new_env.keys():
-        if any(i in varname.lower() for i in ["pass", "token", "secret", "auth"]):
-            log.debug("Removing env %s", varname)
-            new_env.pop(varname)
-    return new_env
-
-
 def wrap_in_bwrap(cmdline, bwrap_args=None):
     bwrap_cmd = ["bwrap", "--unshare-all", "--dev", "/dev"]
     for path in ("/usr", "/lib", "/lib64", "/bin", "/proc"):
@@ -370,7 +360,6 @@ async def run(self, input_data: t.Optional[bytes] = None) -> t.Tuple[bytes, byte
             stdin=self.stdin,
             stdout=self.stdout,
             stderr=self.stderr,
-            env=clear_env(os.environ),
         )
         try:
             stdout, stderr = await asyncio.wait_for(
@@ -404,7 +393,6 @@ def run_sync(self, input_data: t.Optional[bytes] = None) -> t.Tuple[bytes, bytes
             stdout=self.stdout,
             stderr=self.stderr,
             timeout=self.timeout,
-            env=clear_env(os.environ),
             check=False,
         )
         proc.check_returncode()
diff --git a/tests/test_util.py b/tests/test_util.py
index f3cf8cc0..d9399083 100755
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -19,7 +19,6 @@
 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 import unittest
 import subprocess
-import os
 from datetime import datetime, timezone
 from time import perf_counter
 from contextlib import contextmanager
@@ -80,18 +79,6 @@ def test_preserve_auth(self):
 
 
 class TestCommand(unittest.IsolatedAsyncioTestCase):
-    _PUBLIC_ENV_VAR = "NOTHING_SPECIAL_HERE"
-    _SECRET_ENV_VAR = "SOME_TOKEN_HERE"
-    _TEST_ENV_VALUE = "leaked"
-
-    def setUp(self):
-        os.environ[self._PUBLIC_ENV_VAR] = self._TEST_ENV_VALUE
-        os.environ[self._SECRET_ENV_VAR] = self._TEST_ENV_VALUE
-
-    def tearDown(self):
-        del os.environ[self._PUBLIC_ENV_VAR]
-        del os.environ[self._SECRET_ENV_VAR]
-
     @contextmanager
     def _assert_timeout(self, timeout: float):
         start_time = perf_counter()
@@ -99,24 +86,6 @@ def _assert_timeout(self, timeout: float):
         elapsed = perf_counter() - start_time
         self.assertLess(elapsed, timeout)
 
-    def test_clear_env(self):
-        cmd = Command(["printenv", self._PUBLIC_ENV_VAR])
-        stdout = cmd.run_sync()[0].decode().strip()
-        self.assertEqual(stdout, self._TEST_ENV_VALUE)
-
-        cmd = Command(["printenv", self._SECRET_ENV_VAR])
-        with self.assertRaises(subprocess.CalledProcessError):
-            cmd.run_sync()
-
-    async def test_clear_env_async(self):
-        cmd = Command(["printenv", self._PUBLIC_ENV_VAR])
-        stdout = (await cmd.run())[0].decode().strip()
-        self.assertEqual(stdout, self._TEST_ENV_VALUE)
-
-        cmd = Command(["printenv", self._SECRET_ENV_VAR])
-        with self.assertRaises(subprocess.CalledProcessError):
-            await cmd.run()
-
     def test_timeout(self):
         cmd = Command(["sleep", "1"], timeout=0.2)
         with self._assert_timeout(0.5):

From bc027a4b3807194ab1f449254ca924d454c19d54 Mon Sep 17 00:00:00 2001
From: Dan Nicholson <dbn@endlessos.org>
Date: Mon, 15 Apr 2024 09:18:38 -0600
Subject: [PATCH 2/4] tests: Clear git commit environment variables

In order to test the data added to git commits, clear the associated
environment variables so that the test fully manages inputs.
---
 tests/test_main.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tests/test_main.py b/tests/test_main.py
index 26129311..f45a61fb 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -3,6 +3,7 @@
 import shutil
 import tempfile
 import unittest
+from unittest.mock import patch
 
 from src import main
 
@@ -15,8 +16,10 @@
 )
 
 
+@patch.dict(os.environ)
 class TestEntrypoint(unittest.IsolatedAsyncioTestCase):
     def setUp(self):
+        self._clear_environment()
         self.test_dir = tempfile.TemporaryDirectory()
         self.manifest_filename = os.path.basename(TEST_MANIFEST)
         self.appdata_filename = os.path.basename(TEST_APPDATA)
@@ -34,6 +37,17 @@ def setUp(self):
     def tearDown(self):
         self.test_dir.cleanup()
 
+    def _clear_environment(self):
+        unwanted_vars = [
+            "EMAIL",
+            "GIT_AUTHOR_NAME",
+            "GIT_AUTHOR_EMAIL",
+            "GIT_COMMITTER_NAME",
+            "GIT_COMMITTER_EMAIL",
+        ]
+        for var in unwanted_vars:
+            os.environ.pop(var, None)
+
     def _run_cmd(self, cmd):
         return subprocess.run(cmd, cwd=self.test_dir.name, check=True)
 

From f82612bc3df6365f93476f2989c6237110787098 Mon Sep 17 00:00:00 2001
From: Dan Nicholson <dbn@endlessos.org>
Date: Mon, 15 Apr 2024 09:38:26 -0600
Subject: [PATCH 3/4] tests: Check git commit data

Test that the git commit was made with the correct values in the
metadata. Primarily this is useful to make sure that commit authorship
is being done correctly.
---
 tests/test_main.py | 48 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 46 insertions(+), 2 deletions(-)

diff --git a/tests/test_main.py b/tests/test_main.py
index f45a61fb..b74b64b3 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -48,15 +48,59 @@ def _clear_environment(self):
         for var in unwanted_vars:
             os.environ.pop(var, None)
 
-    def _run_cmd(self, cmd):
-        return subprocess.run(cmd, cwd=self.test_dir.name, check=True)
+    def _run_cmd(self, cmd, **kwargs):
+        return subprocess.run(cmd, cwd=self.test_dir.name, check=True, **kwargs)
+
+    def _get_commit_data(self, rev="HEAD"):
+        data = {}
+        for name, fmt in [
+            ("commit", "%H"),
+            ("subject", "%s"),
+            ("body", "%b"),
+            ("author_name", "%an"),
+            ("author_email", "%ae"),
+            ("committer_name", "%cn"),
+            ("committer_email", "%ce"),
+        ]:
+            cmd = ["git", "show", "--no-patch", f"--pretty=format:{fmt}", rev]
+            proc = self._run_cmd(cmd, stdout=subprocess.PIPE)
+            output = proc.stdout.decode("utf-8")
+            data[name] = output
+
+        return data
 
     async def test_full_run(self):
         args1 = main.parse_cli_args(["--update", "--commit-only", self.manifest_path])
         self.assertEqual(await main.run_with_args(args1), (2, 0, True))
+
+        commit_data = self._get_commit_data()
+        self.assertEqual(commit_data["subject"], "Update 2 modules")
+        self.assertEqual(commit_data["author_name"], "Test Runner")
+        self.assertEqual(commit_data["author_email"], "test@localhost")
+        self.assertEqual(commit_data["committer_name"], "Test Runner")
+        self.assertEqual(commit_data["committer_email"], "test@localhost")
+
+        body_lines = commit_data["body"].splitlines()
+        self.assertEqual(len(body_lines), 2)
+        self.assertRegex(body_lines[0], r"^Update libXaw-1.0.12.tar.bz2 to ")
+        self.assertRegex(body_lines[1], r"^Update xterm-snapshots.git to ")
+
         args2 = main.parse_cli_args([self.manifest_path])
         self.assertEqual(await main.run_with_args(args2), (0, 0, False))
 
+    async def test_git_envvars(self):
+        os.environ["GIT_AUTHOR_NAME"] = "Some Guy"
+        os.environ["GIT_AUTHOR_EMAIL"] = "someguy@localhost"
+        args1 = main.parse_cli_args(["--update", "--commit-only", self.manifest_path])
+        self.assertEqual(await main.run_with_args(args1), (2, 0, True))
+
+        commit_data = self._get_commit_data()
+        self.assertEqual(commit_data["subject"], "Update 2 modules")
+        self.assertEqual(commit_data["author_name"], "Some Guy")
+        self.assertEqual(commit_data["author_email"], "someguy@localhost")
+        self.assertEqual(commit_data["committer_name"], "Test Runner")
+        self.assertEqual(commit_data["committer_email"], "test@localhost")
+
 
 class TestForceForkTristate(unittest.TestCase):
     def test_neither_fork_arg(self):

From 609991ca6cc7ad0b82eaa347ca228d46bfca9044 Mon Sep 17 00:00:00 2001
From: Dan Nicholson <dbn@endlessos.org>
Date: Mon, 15 Apr 2024 09:44:20 -0600
Subject: [PATCH 4/4] README: Remove suggestion to set EMAIL environment
 variable

It has no effect if GIT_AUTHOR_EMAIL is being processed correctly.
---
 README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/README.md b/README.md
index 7004da81..c76d2bd5 100644
--- a/README.md
+++ b/README.md
@@ -114,7 +114,6 @@ jobs:
           # email sets "github-actions[bot]" as commit author, see https://github.community/t/github-actions-bot-email-address/17204/6
           GIT_AUTHOR_EMAIL: 41898282+github-actions[bot]@users.noreply.github.com
           GIT_COMMITTER_EMAIL: 41898282+github-actions[bot]@users.noreply.github.com
-          EMAIL: 41898282+github-actions[bot]@users.noreply.github.com
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         with:
           args: --update --never-fork $PATH_TO_MANIFEST # e.g. com.organization.myapp.json