From c056a570ed7da5f5706672e748a2d03085e9e6d8 Mon Sep 17 00:00:00 2001 From: Charles Coggins Date: Thu, 13 Oct 2022 22:37:41 -0500 Subject: [PATCH 1/4] fix: sanitize user input to guard against possible cmd injection This change makes use of the `shlex` module to `quote` the parts of any command line that can possibly come from user supplied input. The command line is then `split` with the same module to ensure proper and sanitized tokenization when supplied to a `subprocess.run` call. The `shlex` module is only designed for Unix shells. The `shlex.quote()` function is not guaranteed to be correct on non-POSIX compliant shells or shells from other operating systems such as Windows. Therefore, the documentation and PyPI package classifiers were updated to make that operating limitation more obvious. Fixes #143 --- README.md | 4 ++-- pyproject.toml | 1 + src/phylum/ci/ci_azure.py | 3 ++- src/phylum/ci/ci_base.py | 5 +++-- src/phylum/ci/ci_github.py | 9 +++++---- src/phylum/ci/ci_gitlab.py | 7 ++++--- src/phylum/ci/ci_none.py | 5 +++-- src/phylum/ci/cli.py | 2 +- 8 files changed, 21 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 9bc12611..61f11475 100644 --- a/README.md +++ b/README.md @@ -42,8 +42,8 @@ pipx run --spec phylum phylum-init pipx run --spec phylum phylum-ci ``` -These installation methods require Python 3.7+ to run. For a self contained environment, consider using the Docker -image as described below. +These installation methods require Python 3.7+ to run. The application requires a POSIX compliant shell in which to run. +For a self contained environment, consider using the Docker image as described below. ### Usage diff --git a/pyproject.toml b/pyproject.toml index 56403f76..38646b20 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ classifiers = [ "Topic :: Security", "Topic :: Software Development", "Topic :: Software Development :: Quality Assurance", + "Operating System :: POSIX", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", diff --git a/src/phylum/ci/ci_azure.py b/src/phylum/ci/ci_azure.py index 4d9f862e..a042a279 100644 --- a/src/phylum/ci/ci_azure.py +++ b/src/phylum/ci/ci_azure.py @@ -19,6 +19,7 @@ import urllib.parse from argparse import Namespace from pathlib import Path +from shlex import quote, split from typing import Optional import requests @@ -112,7 +113,7 @@ def common_lockfile_ancestor_commit(self) -> Optional[str]: if pr_tgt_branch.startswith(old_ref_prefix): pr_tgt_branch = pr_tgt_branch.replace(old_ref_prefix, new_ref_prefix, 1) - cmd = f"git merge-base {pr_src_branch} {pr_tgt_branch}".split() + cmd = split(f"git merge-base {quote(pr_src_branch)} {quote(pr_tgt_branch)}") try: common_ancestor_commit = subprocess.run(cmd, check=True, capture_output=True, text=True).stdout.strip() print(f" [+] Common lockfile ancestor commit: {common_ancestor_commit}") diff --git a/src/phylum/ci/ci_base.py b/src/phylum/ci/ci_base.py index 210b92a7..3d423bd5 100644 --- a/src/phylum/ci/ci_base.py +++ b/src/phylum/ci/ci_base.py @@ -14,6 +14,7 @@ from abc import ABC, abstractmethod from argparse import Namespace from pathlib import Path +from shlex import quote, split from typing import List, Optional, Tuple from packaging.version import Version @@ -272,8 +273,8 @@ def previous_lockfile_object(self) -> Optional[str]: if not self.common_lockfile_ancestor_commit: return None try: - cmd_line = f"git rev-parse --verify {self.common_lockfile_ancestor_commit}:{self.lockfile.name}".split() - prev_lockfile_object = subprocess.run(cmd_line, check=True, capture_output=True, text=True).stdout.strip() + cmd = f"git rev-parse --verify {quote(self.common_lockfile_ancestor_commit)}:{quote(self.lockfile.name)}" + prev_lockfile_object = subprocess.run(split(cmd), check=True, capture_output=True, text=True).stdout.strip() except subprocess.CalledProcessError as err: # There could be a true error, but the working assumption when here is a previous version does not exist print(f" [?] There *may* be an issue with the attempt to get the previous lockfile object: {err}") diff --git a/src/phylum/ci/ci_github.py b/src/phylum/ci/ci_github.py index 160a42f6..f9cdef7b 100644 --- a/src/phylum/ci/ci_github.py +++ b/src/phylum/ci/ci_github.py @@ -15,6 +15,7 @@ import subprocess from argparse import Namespace from pathlib import Path +from shlex import quote, split from typing import Optional import requests @@ -31,8 +32,8 @@ def __init__(self, args: Namespace) -> None: # It is added before super().__init__(args) so that lockfile change detection will be set properly. # See https://github.com/actions/checkout/issues/766 (git CVE-2022-24765) for more detail. github_workspace = os.getenv("GITHUB_WORKSPACE", "/github/workspace") - cmd = f"git config --global --add safe.directory {github_workspace}" - subprocess.run(cmd.split(), check=True) + cmd = f"git config --global --add safe.directory {quote(github_workspace)}" + subprocess.run(split(cmd), check=True) super().__init__(args) self.ci_platform_name = "GitHub Actions" @@ -115,8 +116,8 @@ def _is_lockfile_changed(self, lockfile: Path) -> bool: try: # `--exit-code` will make git exit with 1 if there were differences while 0 means no differences. # Any other exit code is an error and a reason to re-raise. - cmd = f"git diff --exit-code --quiet {pr_base_sha} -- {lockfile.resolve()}" - subprocess.run(cmd.split(), check=True) + cmd = f"git diff --exit-code --quiet {quote(pr_base_sha)} -- {lockfile.resolve()}" + subprocess.run(split(cmd), check=True) return False except subprocess.CalledProcessError as err: if err.returncode == 1: diff --git a/src/phylum/ci/ci_gitlab.py b/src/phylum/ci/ci_gitlab.py index fdc58db3..57a3e2c4 100644 --- a/src/phylum/ci/ci_gitlab.py +++ b/src/phylum/ci/ci_gitlab.py @@ -12,6 +12,7 @@ from argparse import Namespace from functools import lru_cache from pathlib import Path +from shlex import quote, split from typing import Optional import requests @@ -125,7 +126,7 @@ def common_lockfile_ancestor_commit(self) -> Optional[str]: default_branch = os.getenv("CI_DEFAULT_BRANCH", "HEAD") # This is a best effort attempt since it is finding the merge base between the current commit # and the default branch instead of finding the exact commit from which the branch was created. - cmd = f"git merge-base HEAD refs/remotes/{remote}/{default_branch}".split() + cmd = split(f"git merge-base HEAD refs/remotes/{quote(remote)}/{quote(default_branch)}") try: common_ancestor_commit = subprocess.run(cmd, check=True, capture_output=True, text=True).stdout.strip() except subprocess.CalledProcessError as err: @@ -146,8 +147,8 @@ def _is_lockfile_changed(self, lockfile: Path) -> bool: try: # `--exit-code` will make git exit with 1 if there were differences while 0 means no differences. # Any other exit code is an error and a reason to re-raise. - cmd = f"git diff --exit-code --quiet {diff_base_sha} -- {lockfile.resolve()}" - subprocess.run(cmd.split(), check=True) + cmd = f"git diff --exit-code --quiet {quote(diff_base_sha)} -- {lockfile.resolve()}" + subprocess.run(split(cmd), check=True) return False except subprocess.CalledProcessError as err: if err.returncode == 1: diff --git a/src/phylum/ci/ci_none.py b/src/phylum/ci/ci_none.py index 14c838c2..6af840b0 100644 --- a/src/phylum/ci/ci_none.py +++ b/src/phylum/ci/ci_none.py @@ -7,6 +7,7 @@ import argparse import subprocess from pathlib import Path +from shlex import quote, split from typing import Optional from connect.utils.terminal.markdown import render @@ -53,7 +54,7 @@ def phylum_label(self) -> str: def common_lockfile_ancestor_commit(self) -> Optional[str]: """Find the common lockfile ancestor commit.""" remote = git_remote() - cmd = f"git merge-base HEAD refs/remotes/{remote}/HEAD".split() + cmd = split(f"git merge-base HEAD refs/remotes/{quote(remote)}/HEAD") try: common_ancestor_commit = subprocess.run(cmd, check=True, capture_output=True, text=True).stdout.strip() except subprocess.CalledProcessError as err: @@ -75,7 +76,7 @@ def _is_lockfile_changed(self, lockfile: Path) -> bool: https://git-scm.com/docs/git-diff#Documentation/git-diff.txt-emgitdiffemltoptionsgtltcommitgtltcommitgt--ltpathgt82308203 """ remote = git_remote() - cmd = f"git diff --exit-code --quiet refs/remotes/{remote}/HEAD... -- {lockfile.resolve()}".split() + cmd = split(f"git diff --exit-code --quiet refs/remotes/{quote(remote)}/HEAD... -- {lockfile.resolve()}") try: # `--exit-code` will make git exit with with 1 if there were differences while 0 means no differences. # Any other exit code is an error and a reason to re-raise. diff --git a/src/phylum/ci/cli.py b/src/phylum/ci/cli.py index 9d6cb60c..9231f383 100644 --- a/src/phylum/ci/cli.py +++ b/src/phylum/ci/cli.py @@ -98,7 +98,7 @@ def ensure_project(ci_env: CIBase) -> None: def get_phylum_analysis(ci_env: CIBase) -> dict: """Analyze a project lockfile from a given CI environment with the phylum CLI and return the analysis.""" # Build up the analyze command based on the provided inputs - cmd = f"{ci_env.cli_path} analyze -l {ci_env.phylum_label}" + cmd = f"{ci_env.cli_path} analyze -l {shlex.quote(ci_env.phylum_label)}" if ci_env.phylum_project: cmd = f"{cmd} --project {ci_env.phylum_project}" # A group can not be specified without a project From 6eb965f495ef02dfc4822c50a0f4fd65e22232d0 Mon Sep 17 00:00:00 2001 From: Charles Coggins Date: Fri, 14 Oct 2022 13:40:28 -0500 Subject: [PATCH 2/4] fix: sanitize user input to guard against possible cmd injection This change backs out the use of the `shlex` module to quote the parts of any command line that can possibly come from user supplied input. Explicit lists of command line arguments are used instead. Every element of each list is a single string, regardless of whitespace contained within. This is true for both static string literals and variables, whether they came from user input or not. Therefore, each element in the list will be considered a separate token/argument when supplied to a `subprocess.run` call. Every instance of `subprocess.run` was reviewed and updated to this new format. Fixes #143 --- README.md | 2 +- pyproject.toml | 1 - src/phylum/ci/ci_azure.py | 7 +++---- src/phylum/ci/ci_base.py | 26 +++++++++++++++----------- src/phylum/ci/ci_github.py | 9 ++++----- src/phylum/ci/ci_gitlab.py | 9 ++++----- src/phylum/ci/ci_none.py | 9 ++++----- src/phylum/ci/ci_precommit.py | 8 ++++---- src/phylum/ci/cli.py | 25 ++++++++++--------------- src/phylum/init/cli.py | 16 ++++++++-------- 10 files changed, 53 insertions(+), 59 deletions(-) diff --git a/README.md b/README.md index 61f11475..2af7c3c7 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ pipx run --spec phylum phylum-init pipx run --spec phylum phylum-ci ``` -These installation methods require Python 3.7+ to run. The application requires a POSIX compliant shell in which to run. +These installation methods require Python 3.7+ to run. For a self contained environment, consider using the Docker image as described below. ### Usage diff --git a/pyproject.toml b/pyproject.toml index 38646b20..56403f76 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,6 @@ classifiers = [ "Topic :: Security", "Topic :: Software Development", "Topic :: Software Development :: Quality Assurance", - "Operating System :: POSIX", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", diff --git a/src/phylum/ci/ci_azure.py b/src/phylum/ci/ci_azure.py index a042a279..58daee37 100644 --- a/src/phylum/ci/ci_azure.py +++ b/src/phylum/ci/ci_azure.py @@ -19,7 +19,6 @@ import urllib.parse from argparse import Namespace from pathlib import Path -from shlex import quote, split from typing import Optional import requests @@ -113,7 +112,7 @@ def common_lockfile_ancestor_commit(self) -> Optional[str]: if pr_tgt_branch.startswith(old_ref_prefix): pr_tgt_branch = pr_tgt_branch.replace(old_ref_prefix, new_ref_prefix, 1) - cmd = split(f"git merge-base {quote(pr_src_branch)} {quote(pr_tgt_branch)}") + cmd = ["git", "merge-base", pr_src_branch, pr_tgt_branch] try: common_ancestor_commit = subprocess.run(cmd, check=True, capture_output=True, text=True).stdout.strip() print(f" [+] Common lockfile ancestor commit: {common_ancestor_commit}") @@ -141,8 +140,8 @@ def _is_lockfile_changed(self, lockfile: Path) -> bool: try: # `--exit-code` will make git exit with 1 if there were differences while 0 means no differences. # Any other exit code is an error and a reason to re-raise. - cmd = f"git diff --exit-code --quiet {pr_base_sha} -- {lockfile.resolve()}" - subprocess.run(cmd.split(), check=True) + cmd = ["git", "diff", "--exit-code", "--quiet", pr_base_sha, "--", str(lockfile.resolve())] + subprocess.run(cmd, check=True) return False except subprocess.CalledProcessError as err: if err.returncode == 1: diff --git a/src/phylum/ci/ci_base.py b/src/phylum/ci/ci_base.py index 3d423bd5..f9b35002 100644 --- a/src/phylum/ci/ci_base.py +++ b/src/phylum/ci/ci_base.py @@ -14,7 +14,6 @@ from abc import ABC, abstractmethod from argparse import Namespace from pathlib import Path -from shlex import quote, split from typing import List, Optional, Tuple from packaging.version import Version @@ -38,8 +37,8 @@ def git_remote() -> str: This function is limited in that it will only work when there is a single remote defined. A RuntimeError exception will be raised when there is not exactly one remote. """ - cmd = "git remote" - remotes = subprocess.run(cmd.split(), check=True, text=True, capture_output=True).stdout.splitlines() + cmd = ["git", "remote"] + remotes = subprocess.run(cmd, check=True, text=True, capture_output=True).stdout.splitlines() if not remotes: raise RuntimeError("No git remotes configured") if len(remotes) > 1: @@ -257,7 +256,7 @@ def current_lockfile_packages(self) -> Packages: if not self.cli_path: raise SystemExit(" [!] Phylum CLI path is unknown. Try using the `init_cli` method first.") try: - cmd = f"{self.cli_path} parse {self.lockfile}".split() + cmd = [str(self.cli_path), "parse", str(self.lockfile)] parse_result = subprocess.run(cmd, check=True, capture_output=True, text=True).stdout.strip() except subprocess.CalledProcessError as err: print(f" [!] There was an error running the command: {' '.join(err.cmd)}") @@ -273,8 +272,13 @@ def previous_lockfile_object(self) -> Optional[str]: if not self.common_lockfile_ancestor_commit: return None try: - cmd = f"git rev-parse --verify {quote(self.common_lockfile_ancestor_commit)}:{quote(self.lockfile.name)}" - prev_lockfile_object = subprocess.run(split(cmd), check=True, capture_output=True, text=True).stdout.strip() + cmd = [ + "git", + "rev-parse", + "--verify", + f"{self.common_lockfile_ancestor_commit}:{self.lockfile.name}", + ] + prev_lockfile_object = subprocess.run(cmd, check=True, capture_output=True, text=True).stdout.strip() except subprocess.CalledProcessError as err: # There could be a true error, but the working assumption when here is a previous version does not exist print(f" [?] There *may* be an issue with the attempt to get the previous lockfile object: {err}") @@ -293,8 +297,8 @@ def get_previous_lockfile_packages(self, prev_lockfile_object: str) -> Packages: with tempfile.NamedTemporaryFile(mode="w+") as prev_lockfile_fd: try: - cmd = f"git cat-file blob {prev_lockfile_object}" - prev_lockfile_contents = subprocess.run(cmd.split(), check=True, capture_output=True, text=True).stdout + cmd = ["git", "cat-file", "blob", prev_lockfile_object] + prev_lockfile_contents = subprocess.run(cmd, check=True, capture_output=True, text=True).stdout prev_lockfile_fd.write(prev_lockfile_contents) prev_lockfile_fd.flush() except subprocess.CalledProcessError as err: @@ -304,8 +308,8 @@ def get_previous_lockfile_packages(self, prev_lockfile_object: str) -> Packages: print(" [!] Due to error, assuming no previous lockfile packages. Please report this as a bug.") return [] try: - cmd = f"{self.cli_path} parse {prev_lockfile_fd.name}" - parse_result = subprocess.run(cmd.split(), check=True, capture_output=True, text=True).stdout.strip() + cmd = [str(self.cli_path), "parse", prev_lockfile_fd.name] + parse_result = subprocess.run(cmd, check=True, capture_output=True, text=True).stdout.strip() except subprocess.CalledProcessError as err: print(f" [!] There was an error running the command: {' '.join(err.cmd)}") print(f" [!] stdout:\n{err.stdout}") @@ -369,7 +373,7 @@ def init_cli(self) -> None: # Exit condition: a Phylum API key should be in place or available at this point. # Ensure stdout is piped to DEVNULL, to keep the token from being printed in (CI log) output. - cmd = f"{cli_path} auth token".split() + cmd = [str(cli_path), "auth", "token"] # pylint: disable-next=subprocess-run-check ; we want the return code here and don't want to raise when non-zero if bool(subprocess.run(cmd, stdout=subprocess.DEVNULL).returncode): raise SystemExit(" [!] A Phylum API key is required to continue.") diff --git a/src/phylum/ci/ci_github.py b/src/phylum/ci/ci_github.py index f9cdef7b..71186bd7 100644 --- a/src/phylum/ci/ci_github.py +++ b/src/phylum/ci/ci_github.py @@ -15,7 +15,6 @@ import subprocess from argparse import Namespace from pathlib import Path -from shlex import quote, split from typing import Optional import requests @@ -32,8 +31,8 @@ def __init__(self, args: Namespace) -> None: # It is added before super().__init__(args) so that lockfile change detection will be set properly. # See https://github.com/actions/checkout/issues/766 (git CVE-2022-24765) for more detail. github_workspace = os.getenv("GITHUB_WORKSPACE", "/github/workspace") - cmd = f"git config --global --add safe.directory {quote(github_workspace)}" - subprocess.run(split(cmd), check=True) + cmd = ["git", "config", "--global", "--add", "safe.directory", github_workspace] + subprocess.run(cmd, check=True) super().__init__(args) self.ci_platform_name = "GitHub Actions" @@ -116,8 +115,8 @@ def _is_lockfile_changed(self, lockfile: Path) -> bool: try: # `--exit-code` will make git exit with 1 if there were differences while 0 means no differences. # Any other exit code is an error and a reason to re-raise. - cmd = f"git diff --exit-code --quiet {quote(pr_base_sha)} -- {lockfile.resolve()}" - subprocess.run(split(cmd), check=True) + cmd = ["git", "diff", "--exit-code", "--quiet", pr_base_sha, "--", str(lockfile.resolve())] + subprocess.run(cmd, check=True) return False except subprocess.CalledProcessError as err: if err.returncode == 1: diff --git a/src/phylum/ci/ci_gitlab.py b/src/phylum/ci/ci_gitlab.py index 57a3e2c4..b37a4c1b 100644 --- a/src/phylum/ci/ci_gitlab.py +++ b/src/phylum/ci/ci_gitlab.py @@ -12,7 +12,6 @@ from argparse import Namespace from functools import lru_cache from pathlib import Path -from shlex import quote, split from typing import Optional import requests @@ -93,7 +92,7 @@ def phylum_label(self) -> str: current_branch = os.getenv("CI_COMMIT_BRANCH", "unknown-branch") # This is the unique key that git uses to refer to the blob type data object for the lockfile. # Reference: https://git-scm.com/book/en/v2/Git-Internals-Git-Objects - cmd = f"git hash-object {self.lockfile}".split() + cmd = ["git", "hash-object", str(self.lockfile)] lockfile_hash_object = subprocess.run(cmd, check=True, text=True, capture_output=True).stdout.strip() label = f"{self.ci_platform_name}_{current_branch}_{lockfile_hash_object[:7]}" @@ -126,7 +125,7 @@ def common_lockfile_ancestor_commit(self) -> Optional[str]: default_branch = os.getenv("CI_DEFAULT_BRANCH", "HEAD") # This is a best effort attempt since it is finding the merge base between the current commit # and the default branch instead of finding the exact commit from which the branch was created. - cmd = split(f"git merge-base HEAD refs/remotes/{quote(remote)}/{quote(default_branch)}") + cmd = ["git", "merge-base", "HEAD", f"refs/remotes/{remote}/{default_branch}"] try: common_ancestor_commit = subprocess.run(cmd, check=True, capture_output=True, text=True).stdout.strip() except subprocess.CalledProcessError as err: @@ -147,8 +146,8 @@ def _is_lockfile_changed(self, lockfile: Path) -> bool: try: # `--exit-code` will make git exit with 1 if there were differences while 0 means no differences. # Any other exit code is an error and a reason to re-raise. - cmd = f"git diff --exit-code --quiet {quote(diff_base_sha)} -- {lockfile.resolve()}" - subprocess.run(split(cmd), check=True) + cmd = ["git", "diff", "--exit-code", "--quiet", diff_base_sha, "--", str(lockfile.resolve())] + subprocess.run(cmd, check=True) return False except subprocess.CalledProcessError as err: if err.returncode == 1: diff --git a/src/phylum/ci/ci_none.py b/src/phylum/ci/ci_none.py index 6af840b0..f2fc9bdd 100644 --- a/src/phylum/ci/ci_none.py +++ b/src/phylum/ci/ci_none.py @@ -7,7 +7,6 @@ import argparse import subprocess from pathlib import Path -from shlex import quote, split from typing import Optional from connect.utils.terminal.markdown import render @@ -38,12 +37,12 @@ def _check_prerequisites(self) -> None: @property def phylum_label(self) -> str: """Get a custom label for use when submitting jobs with `phylum analyze`.""" - cmd = "git branch --show-current".split() + cmd = ["git", "branch", "--show-current"] current_branch = subprocess.run(cmd, check=True, text=True, capture_output=True).stdout.strip() # This is the unique key that git uses to refer to the blob type data object for the lockfile. # Reference: https://git-scm.com/book/en/v2/Git-Internals-Git-Objects - cmd = f"git hash-object {self.lockfile}".split() + cmd = ["git", "hash-object", str(self.lockfile)] lockfile_hash_object = subprocess.run(cmd, check=True, text=True, capture_output=True).stdout.strip() label = f"{self.ci_platform_name}_{current_branch}_{lockfile_hash_object[:7]}" label = label.replace(" ", "-") @@ -54,7 +53,7 @@ def phylum_label(self) -> str: def common_lockfile_ancestor_commit(self) -> Optional[str]: """Find the common lockfile ancestor commit.""" remote = git_remote() - cmd = split(f"git merge-base HEAD refs/remotes/{quote(remote)}/HEAD") + cmd = ["git", "merge-base", "HEAD", f"refs/remotes/{remote}/HEAD"] try: common_ancestor_commit = subprocess.run(cmd, check=True, capture_output=True, text=True).stdout.strip() except subprocess.CalledProcessError as err: @@ -76,7 +75,7 @@ def _is_lockfile_changed(self, lockfile: Path) -> bool: https://git-scm.com/docs/git-diff#Documentation/git-diff.txt-emgitdiffemltoptionsgtltcommitgtltcommitgt--ltpathgt82308203 """ remote = git_remote() - cmd = split(f"git diff --exit-code --quiet refs/remotes/{quote(remote)}/HEAD... -- {lockfile.resolve()}") + cmd = ["git", "diff", "--exit-code", "--quiet", f"refs/remotes/{remote}/HEAD...", "--", str(lockfile.resolve())] try: # `--exit-code` will make git exit with with 1 if there were differences while 0 means no differences. # Any other exit code is an error and a reason to re-raise. diff --git a/src/phylum/ci/ci_precommit.py b/src/phylum/ci/ci_precommit.py index 5ea87969..4312c710 100644 --- a/src/phylum/ci/ci_precommit.py +++ b/src/phylum/ci/ci_precommit.py @@ -34,7 +34,7 @@ def _check_prerequisites(self) -> None: """ super()._check_prerequisites() - cmd = "git diff --cached --name-only".split() + cmd = ["git", "diff", "--cached", "--name-only"] staged_files = subprocess.run(cmd, check=True, text=True, capture_output=True).stdout.strip().split("\n") extra_arg_paths = (Path(extra_arg).resolve() for extra_arg in self.extra_args) @@ -75,12 +75,12 @@ def _check_prerequisites(self) -> None: @property def phylum_label(self) -> str: """Get a custom label for use when submitting jobs with `phylum analyze`.""" - cmd = "git branch --show-current".split() + cmd = ["git", "branch", "--show-current"] current_branch = subprocess.run(cmd, check=True, text=True, capture_output=True).stdout.strip() # This is the unique key that git uses to refer to the blob type data object for the lockfile. # Reference: https://git-scm.com/book/en/v2/Git-Internals-Git-Objects - cmd = f"git hash-object {self.lockfile}".split() + cmd = ["git", "hash-object", str(self.lockfile)] lockfile_hash_object = subprocess.run(cmd, check=True, text=True, capture_output=True).stdout.strip() label = f"{self.ci_platform_name}_{current_branch}_{lockfile_hash_object[:7]}" label = label.replace(" ", "-") @@ -90,7 +90,7 @@ def phylum_label(self) -> str: @property def common_lockfile_ancestor_commit(self) -> Optional[str]: """Find the common lockfile ancestor commit.""" - cmd = "git rev-parse --verify HEAD".split() + cmd = ["git", "rev-parse", "--verify", "HEAD"] try: common_ancestor_commit = subprocess.run(cmd, check=True, capture_output=True, text=True).stdout.strip() except subprocess.CalledProcessError as err: diff --git a/src/phylum/ci/cli.py b/src/phylum/ci/cli.py index 9231f383..961ee747 100644 --- a/src/phylum/ci/cli.py +++ b/src/phylum/ci/cli.py @@ -3,7 +3,6 @@ import json import os import pathlib -import shlex import subprocess import sys from typing import List, Optional, Sequence, Tuple @@ -73,16 +72,16 @@ def ensure_project(ci_env: CIBase) -> None: if not ci_env.phylum_project: return - cmd = f"{ci_env.cli_path} project create {ci_env.phylum_project}" + cmd = [str(ci_env.cli_path), "project", "create", ci_env.phylum_project] if ci_env.phylum_group: print(f" [-] Using Phylum group: {ci_env.phylum_group}") - cmd = f"{ci_env.cli_path} project create --group {ci_env.phylum_group} {ci_env.phylum_project}" + cmd = [str(ci_env.cli_path), "project", "create", "--group", ci_env.phylum_group, ci_env.phylum_project] print(f" [*] Creating a Phylum project with the name: {ci_env.phylum_project} ...") if ci_env.phylum_project_file.exists(): print(f" [+] Overwriting existing `.phylum_project` file found at: {ci_env.phylum_project_file}") - ret = subprocess.run(shlex.split(cmd), check=False) + ret = subprocess.run(cmd, check=False) # The Phylum CLI will return a unique error code of 14 when a project that already # exists is attempted to be created. This situation is recognized and allowed to happen # since it means the project exists as expected. Any other exit code is an error. @@ -91,24 +90,24 @@ def ensure_project(ci_env: CIBase) -> None: elif ret.returncode == 14: print(f" [-] Project {ci_env.phylum_project} already exists. Continuing with it ...") else: - print(f" [!] There was a problem creating the project with command: {cmd}") + print(f" [!] There was a problem creating the project with command: {' '.join(cmd)}") ret.check_returncode() def get_phylum_analysis(ci_env: CIBase) -> dict: """Analyze a project lockfile from a given CI environment with the phylum CLI and return the analysis.""" # Build up the analyze command based on the provided inputs - cmd = f"{ci_env.cli_path} analyze -l {shlex.quote(ci_env.phylum_label)}" + cmd = [str(ci_env.cli_path), "analyze", "-l", ci_env.phylum_label] if ci_env.phylum_project: - cmd = f"{cmd} --project {ci_env.phylum_project}" + cmd.extend(["--project", ci_env.phylum_project]) # A group can not be specified without a project if ci_env.phylum_group: - cmd = f"{cmd} --group {ci_env.phylum_group}" - cmd = f"{cmd} --verbose --json {ci_env.lockfile}" + cmd.extend(["--group", ci_env.phylum_group]) + cmd.extend(["--verbose", "--json", str(ci_env.lockfile)]) - print(f" [*] Performing analysis with command: {cmd} ...") + print(f" [*] Performing analysis with command: {' '.join(cmd)} ...") try: - analysis_result = subprocess.run(shlex.split(cmd), check=True, capture_output=True, text=True).stdout + analysis_result = subprocess.run(cmd, check=True, capture_output=True, text=True).stdout except subprocess.CalledProcessError as err: # The Phylum project can set the CLI to "fail the build" if threshold requirements are not met. # This causes the return code to be non-zero and lands us here. Check for this case to proceed. @@ -183,15 +182,11 @@ def get_args(args: Optional[Sequence[str]] = None) -> Tuple[argparse.Namespace, analysis_group.add_argument( "-p", "--project", - # NOTE: The method of using the shlex module is known to be compatible with UNIX shells. - # It may not function as desired for other operating systems and/or shell types. - type=shlex.quote, help="Name of a Phylum project to create and use to perform the analysis.", ) analysis_group.add_argument( "-g", "--group", - type=shlex.quote, help="Optional group name, which will be the owner of the project. Only used when a project is also specified.", ) diff --git a/src/phylum/init/cli.py b/src/phylum/init/cli.py index d3c3ff78..9e10542d 100644 --- a/src/phylum/init/cli.py +++ b/src/phylum/init/cli.py @@ -50,8 +50,8 @@ def get_expected_phylum_bin_path(): def get_phylum_cli_version(cli_path: Path) -> str: """Get the version of the installed and active Phylum CLI and return it.""" - cmd = f"{cli_path} --version" - version = subprocess.run(cmd.split(), check=True, capture_output=True, text=True).stdout.strip().lower() + cmd = [str(cli_path), "--version"] + version = subprocess.run(cmd, check=True, capture_output=True, text=True).stdout.strip().lower() # Starting with Python 3.9, the str.removeprefix() method was introduced to do this same thing prefix = "phylum " @@ -269,7 +269,7 @@ def setup_token(token): # The phylum CLI settings.yaml file won't exist upon initial install # but running a command will trigger the CLI to generate it if not phylum_settings_path.exists(): - cmd = f"{phylum_bin_path} version".split() + cmd = [str(phylum_bin_path), "version"] subprocess.run(cmd, check=True) yaml = YAML() @@ -280,7 +280,7 @@ def setup_token(token): yaml.dump(settings_dict, f) # Check that the token was setup correctly by using it to display the current auth status - cmd = f"{phylum_bin_path} auth status".split() + cmd = [str(phylum_bin_path), "auth", "status"] subprocess.run(cmd, check=True) @@ -395,18 +395,18 @@ def main(args=None): # Reference: https://github.com/phylum-dev/cli/pull/671 if args.global_install: # Current assumptions for this method: - # * the /usr/local/bin directory exists, has proper permissions, is on the PATH for all users + # * the /usr/local/bin directory exists, has proper permissions, and is on the PATH for all users # * the install is on a system with glibc - cmd = "install -m 0755 phylum /usr/local/bin/phylum".split() + cmd = ["install", "-m", "0755", "phylum", "/usr/local/bin/phylum"] else: - cmd = "sh install.sh".split() + cmd = ["sh", "install.sh"] subprocess.run(cmd, check=True, cwd=extracted_dir) process_token_option(args) # Check to ensure everything is working phylum_bin_path, _ = get_phylum_bin_path() - cmd = f"{phylum_bin_path} --help".split() + cmd = [str(phylum_bin_path), "--help"] subprocess.run(cmd, check=True) return 0 From bd9036dc62d3620e1c5e8e48e1f986334048bd86 Mon Sep 17 00:00:00 2001 From: Charles Coggins Date: Mon, 17 Oct 2022 11:59:25 -0500 Subject: [PATCH 3/4] docs: add TODO reminders to use `shlex.join` The `shlex.join` method was introduced in Python 3.8 so the TODO comment reminders are tied to the issue to drop Python 3.7 support. --- src/phylum/ci/cli.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/phylum/ci/cli.py b/src/phylum/ci/cli.py index 961ee747..65b9b61e 100644 --- a/src/phylum/ci/cli.py +++ b/src/phylum/ci/cli.py @@ -90,6 +90,9 @@ def ensure_project(ci_env: CIBase) -> None: elif ret.returncode == 14: print(f" [-] Project {ci_env.phylum_project} already exists. Continuing with it ...") else: + # TODO: Use the `shlex.join` method, introduced in Python 3.8, to produce a shell-escaped string + # to protect against injection vulnerabilities (e.g., users copy/pasting the output here). + # https://github.com/phylum-dev/phylum-ci/issues/18 print(f" [!] There was a problem creating the project with command: {' '.join(cmd)}") ret.check_returncode() @@ -105,6 +108,9 @@ def get_phylum_analysis(ci_env: CIBase) -> dict: cmd.extend(["--group", ci_env.phylum_group]) cmd.extend(["--verbose", "--json", str(ci_env.lockfile)]) + # TODO: Use the `shlex.join` method, introduced in Python 3.8, to produce a shell-escaped string + # to protect against injection vulnerabilities (e.g., users copy/pasting the output here). + # https://github.com/phylum-dev/phylum-ci/issues/18 print(f" [*] Performing analysis with command: {' '.join(cmd)} ...") try: analysis_result = subprocess.run(cmd, check=True, capture_output=True, text=True).stdout From cda8ef93c2ba65cd7291b043d4318028e26fde25 Mon Sep 17 00:00:00 2001 From: Charles Coggins Date: Mon, 17 Oct 2022 12:15:58 -0500 Subject: [PATCH 4/4] refactor: construct shell escaped command lines before printing them The `shlex.join` method turns out to be a one-liner that can easily be inlined in the current code base. --- src/phylum/ci/cli.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/phylum/ci/cli.py b/src/phylum/ci/cli.py index 65b9b61e..0e798a72 100644 --- a/src/phylum/ci/cli.py +++ b/src/phylum/ci/cli.py @@ -3,6 +3,7 @@ import json import os import pathlib +import shlex import subprocess import sys from typing import List, Optional, Sequence, Tuple @@ -90,10 +91,8 @@ def ensure_project(ci_env: CIBase) -> None: elif ret.returncode == 14: print(f" [-] Project {ci_env.phylum_project} already exists. Continuing with it ...") else: - # TODO: Use the `shlex.join` method, introduced in Python 3.8, to produce a shell-escaped string - # to protect against injection vulnerabilities (e.g., users copy/pasting the output here). - # https://github.com/phylum-dev/phylum-ci/issues/18 - print(f" [!] There was a problem creating the project with command: {' '.join(cmd)}") + shell_escaped_cmd = " ".join(shlex.quote(arg) for arg in cmd) + print(f" [!] There was a problem creating the project with command: {shell_escaped_cmd}") ret.check_returncode() @@ -108,10 +107,8 @@ def get_phylum_analysis(ci_env: CIBase) -> dict: cmd.extend(["--group", ci_env.phylum_group]) cmd.extend(["--verbose", "--json", str(ci_env.lockfile)]) - # TODO: Use the `shlex.join` method, introduced in Python 3.8, to produce a shell-escaped string - # to protect against injection vulnerabilities (e.g., users copy/pasting the output here). - # https://github.com/phylum-dev/phylum-ci/issues/18 - print(f" [*] Performing analysis with command: {' '.join(cmd)} ...") + shell_escaped_cmd = " ".join(shlex.quote(arg) for arg in cmd) + print(f" [*] Performing analysis with command: {shell_escaped_cmd}") try: analysis_result = subprocess.run(cmd, check=True, capture_output=True, text=True).stdout except subprocess.CalledProcessError as err: