Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: sanitize user input to guard against possible cmd injection #144

Merged
merged 5 commits into from
Oct 17, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ pipx run --spec phylum phylum-init <options>
pipx run --spec phylum phylum-ci <options>
```

These installation methods require Python 3.7+ to run. For a self contained environment, consider using the Docker
image as described below.
These installation methods require Python 3.7+ to run.
For a self contained environment, consider using the Docker image as described below.

### Usage

Expand Down
6 changes: 3 additions & 3 deletions src/phylum/ci/ci_azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def common_lockfile_ancestor_commit(self) -> Optional[str]:
if pr_tgt_branch.startswith(old_ref_prefix):
pr_tgt_branch = pr_tgt_branch.replace(old_ref_prefix, new_ref_prefix, 1)

cmd = f"git merge-base {pr_src_branch} {pr_tgt_branch}".split()
cmd = ["git", "merge-base", pr_src_branch, pr_tgt_branch]
try:
common_ancestor_commit = subprocess.run(cmd, check=True, capture_output=True, text=True).stdout.strip()
print(f" [+] Common lockfile ancestor commit: {common_ancestor_commit}")
Expand Down Expand Up @@ -140,8 +140,8 @@ def _is_lockfile_changed(self, lockfile: Path) -> bool:
try:
# `--exit-code` will make git exit with 1 if there were differences while 0 means no differences.
# Any other exit code is an error and a reason to re-raise.
cmd = f"git diff --exit-code --quiet {pr_base_sha} -- {lockfile.resolve()}"
subprocess.run(cmd.split(), check=True)
cmd = ["git", "diff", "--exit-code", "--quiet", pr_base_sha, "--", str(lockfile.resolve())]
subprocess.run(cmd, check=True)
return False
except subprocess.CalledProcessError as err:
if err.returncode == 1:
Expand Down
25 changes: 15 additions & 10 deletions src/phylum/ci/ci_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ def git_remote() -> str:
This function is limited in that it will only work when there is a single remote defined.
A RuntimeError exception will be raised when there is not exactly one remote.
"""
cmd = "git remote"
remotes = subprocess.run(cmd.split(), check=True, text=True, capture_output=True).stdout.splitlines()
cmd = ["git", "remote"]
remotes = subprocess.run(cmd, check=True, text=True, capture_output=True).stdout.splitlines()
if not remotes:
raise RuntimeError("No git remotes configured")
if len(remotes) > 1:
Expand Down Expand Up @@ -256,7 +256,7 @@ def current_lockfile_packages(self) -> Packages:
if not self.cli_path:
raise SystemExit(" [!] Phylum CLI path is unknown. Try using the `init_cli` method first.")
try:
cmd = f"{self.cli_path} parse {self.lockfile}".split()
cmd = [str(self.cli_path), "parse", str(self.lockfile)]
parse_result = subprocess.run(cmd, check=True, capture_output=True, text=True).stdout.strip()
except subprocess.CalledProcessError as err:
print(f" [!] There was an error running the command: {' '.join(err.cmd)}")
Expand All @@ -272,8 +272,13 @@ def previous_lockfile_object(self) -> Optional[str]:
if not self.common_lockfile_ancestor_commit:
return None
try:
cmd_line = f"git rev-parse --verify {self.common_lockfile_ancestor_commit}:{self.lockfile.name}".split()
prev_lockfile_object = subprocess.run(cmd_line, check=True, capture_output=True, text=True).stdout.strip()
cmd = [
"git",
"rev-parse",
"--verify",
f"{self.common_lockfile_ancestor_commit}:{self.lockfile.name}",
]
prev_lockfile_object = subprocess.run(cmd, check=True, capture_output=True, text=True).stdout.strip()
except subprocess.CalledProcessError as err:
# There could be a true error, but the working assumption when here is a previous version does not exist
print(f" [?] There *may* be an issue with the attempt to get the previous lockfile object: {err}")
Expand All @@ -292,8 +297,8 @@ def get_previous_lockfile_packages(self, prev_lockfile_object: str) -> Packages:

with tempfile.NamedTemporaryFile(mode="w+") as prev_lockfile_fd:
try:
cmd = f"git cat-file blob {prev_lockfile_object}"
prev_lockfile_contents = subprocess.run(cmd.split(), check=True, capture_output=True, text=True).stdout
cmd = ["git", "cat-file", "blob", prev_lockfile_object]
prev_lockfile_contents = subprocess.run(cmd, check=True, capture_output=True, text=True).stdout
prev_lockfile_fd.write(prev_lockfile_contents)
prev_lockfile_fd.flush()
except subprocess.CalledProcessError as err:
Expand All @@ -303,8 +308,8 @@ def get_previous_lockfile_packages(self, prev_lockfile_object: str) -> Packages:
print(" [!] Due to error, assuming no previous lockfile packages. Please report this as a bug.")
return []
try:
cmd = f"{self.cli_path} parse {prev_lockfile_fd.name}"
parse_result = subprocess.run(cmd.split(), check=True, capture_output=True, text=True).stdout.strip()
cmd = [str(self.cli_path), "parse", prev_lockfile_fd.name]
parse_result = subprocess.run(cmd, check=True, capture_output=True, text=True).stdout.strip()
except subprocess.CalledProcessError as err:
print(f" [!] There was an error running the command: {' '.join(err.cmd)}")
print(f" [!] stdout:\n{err.stdout}")
Expand Down Expand Up @@ -368,7 +373,7 @@ def init_cli(self) -> None:

# Exit condition: a Phylum API key should be in place or available at this point.
# Ensure stdout is piped to DEVNULL, to keep the token from being printed in (CI log) output.
cmd = f"{cli_path} auth token".split()
cmd = [str(cli_path), "auth", "token"]
# pylint: disable-next=subprocess-run-check ; we want the return code here and don't want to raise when non-zero
if bool(subprocess.run(cmd, stdout=subprocess.DEVNULL).returncode):
raise SystemExit(" [!] A Phylum API key is required to continue.")
Expand Down
8 changes: 4 additions & 4 deletions src/phylum/ci/ci_github.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ def __init__(self, args: Namespace) -> None:
# It is added before super().__init__(args) so that lockfile change detection will be set properly.
# See https://github.com/actions/checkout/issues/766 (git CVE-2022-24765) for more detail.
github_workspace = os.getenv("GITHUB_WORKSPACE", "/github/workspace")
cmd = f"git config --global --add safe.directory {github_workspace}"
subprocess.run(cmd.split(), check=True)
cmd = ["git", "config", "--global", "--add", "safe.directory", github_workspace]
subprocess.run(cmd, check=True)

super().__init__(args)
self.ci_platform_name = "GitHub Actions"
Expand Down Expand Up @@ -115,8 +115,8 @@ def _is_lockfile_changed(self, lockfile: Path) -> bool:
try:
# `--exit-code` will make git exit with 1 if there were differences while 0 means no differences.
# Any other exit code is an error and a reason to re-raise.
cmd = f"git diff --exit-code --quiet {pr_base_sha} -- {lockfile.resolve()}"
subprocess.run(cmd.split(), check=True)
cmd = ["git", "diff", "--exit-code", "--quiet", pr_base_sha, "--", str(lockfile.resolve())]
subprocess.run(cmd, check=True)
return False
except subprocess.CalledProcessError as err:
if err.returncode == 1:
Expand Down
8 changes: 4 additions & 4 deletions src/phylum/ci/ci_gitlab.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def phylum_label(self) -> str:
current_branch = os.getenv("CI_COMMIT_BRANCH", "unknown-branch")
# This is the unique key that git uses to refer to the blob type data object for the lockfile.
# Reference: https://git-scm.com/book/en/v2/Git-Internals-Git-Objects
cmd = f"git hash-object {self.lockfile}".split()
cmd = ["git", "hash-object", str(self.lockfile)]
lockfile_hash_object = subprocess.run(cmd, check=True, text=True, capture_output=True).stdout.strip()
label = f"{self.ci_platform_name}_{current_branch}_{lockfile_hash_object[:7]}"

Expand Down Expand Up @@ -125,7 +125,7 @@ def common_lockfile_ancestor_commit(self) -> Optional[str]:
default_branch = os.getenv("CI_DEFAULT_BRANCH", "HEAD")
# This is a best effort attempt since it is finding the merge base between the current commit
# and the default branch instead of finding the exact commit from which the branch was created.
cmd = f"git merge-base HEAD refs/remotes/{remote}/{default_branch}".split()
cmd = ["git", "merge-base", "HEAD", f"refs/remotes/{remote}/{default_branch}"]
try:
common_ancestor_commit = subprocess.run(cmd, check=True, capture_output=True, text=True).stdout.strip()
except subprocess.CalledProcessError as err:
Expand All @@ -146,8 +146,8 @@ def _is_lockfile_changed(self, lockfile: Path) -> bool:
try:
# `--exit-code` will make git exit with 1 if there were differences while 0 means no differences.
# Any other exit code is an error and a reason to re-raise.
cmd = f"git diff --exit-code --quiet {diff_base_sha} -- {lockfile.resolve()}"
subprocess.run(cmd.split(), check=True)
cmd = ["git", "diff", "--exit-code", "--quiet", diff_base_sha, "--", str(lockfile.resolve())]
subprocess.run(cmd, check=True)
return False
except subprocess.CalledProcessError as err:
if err.returncode == 1:
Expand Down
8 changes: 4 additions & 4 deletions src/phylum/ci/ci_none.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,12 @@ def _check_prerequisites(self) -> None:
@property
def phylum_label(self) -> str:
"""Get a custom label for use when submitting jobs with `phylum analyze`."""
cmd = "git branch --show-current".split()
cmd = ["git", "branch", "--show-current"]
current_branch = subprocess.run(cmd, check=True, text=True, capture_output=True).stdout.strip()

# This is the unique key that git uses to refer to the blob type data object for the lockfile.
# Reference: https://git-scm.com/book/en/v2/Git-Internals-Git-Objects
cmd = f"git hash-object {self.lockfile}".split()
cmd = ["git", "hash-object", str(self.lockfile)]
lockfile_hash_object = subprocess.run(cmd, check=True, text=True, capture_output=True).stdout.strip()
label = f"{self.ci_platform_name}_{current_branch}_{lockfile_hash_object[:7]}"
label = label.replace(" ", "-")
Expand All @@ -53,7 +53,7 @@ def phylum_label(self) -> str:
def common_lockfile_ancestor_commit(self) -> Optional[str]:
"""Find the common lockfile ancestor commit."""
remote = git_remote()
cmd = f"git merge-base HEAD refs/remotes/{remote}/HEAD".split()
cmd = ["git", "merge-base", "HEAD", f"refs/remotes/{remote}/HEAD"]
try:
common_ancestor_commit = subprocess.run(cmd, check=True, capture_output=True, text=True).stdout.strip()
except subprocess.CalledProcessError as err:
Expand All @@ -75,7 +75,7 @@ def _is_lockfile_changed(self, lockfile: Path) -> bool:
https://git-scm.com/docs/git-diff#Documentation/git-diff.txt-emgitdiffemltoptionsgtltcommitgtltcommitgt--ltpathgt82308203
"""
remote = git_remote()
cmd = f"git diff --exit-code --quiet refs/remotes/{remote}/HEAD... -- {lockfile.resolve()}".split()
cmd = ["git", "diff", "--exit-code", "--quiet", f"refs/remotes/{remote}/HEAD...", "--", str(lockfile.resolve())]
try:
# `--exit-code` will make git exit with with 1 if there were differences while 0 means no differences.
# Any other exit code is an error and a reason to re-raise.
Expand Down
8 changes: 4 additions & 4 deletions src/phylum/ci/ci_precommit.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def _check_prerequisites(self) -> None:
"""
super()._check_prerequisites()

cmd = "git diff --cached --name-only".split()
cmd = ["git", "diff", "--cached", "--name-only"]
staged_files = subprocess.run(cmd, check=True, text=True, capture_output=True).stdout.strip().split("\n")
extra_arg_paths = (Path(extra_arg).resolve() for extra_arg in self.extra_args)

Expand Down Expand Up @@ -75,12 +75,12 @@ def _check_prerequisites(self) -> None:
@property
def phylum_label(self) -> str:
"""Get a custom label for use when submitting jobs with `phylum analyze`."""
cmd = "git branch --show-current".split()
cmd = ["git", "branch", "--show-current"]
current_branch = subprocess.run(cmd, check=True, text=True, capture_output=True).stdout.strip()

# This is the unique key that git uses to refer to the blob type data object for the lockfile.
# Reference: https://git-scm.com/book/en/v2/Git-Internals-Git-Objects
cmd = f"git hash-object {self.lockfile}".split()
cmd = ["git", "hash-object", str(self.lockfile)]
lockfile_hash_object = subprocess.run(cmd, check=True, text=True, capture_output=True).stdout.strip()
label = f"{self.ci_platform_name}_{current_branch}_{lockfile_hash_object[:7]}"
label = label.replace(" ", "-")
Expand All @@ -90,7 +90,7 @@ def phylum_label(self) -> str:
@property
def common_lockfile_ancestor_commit(self) -> Optional[str]:
"""Find the common lockfile ancestor commit."""
cmd = "git rev-parse --verify HEAD".split()
cmd = ["git", "rev-parse", "--verify", "HEAD"]
try:
common_ancestor_commit = subprocess.run(cmd, check=True, capture_output=True, text=True).stdout.strip()
except subprocess.CalledProcessError as err:
Expand Down
31 changes: 16 additions & 15 deletions src/phylum/ci/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import json
import os
import pathlib
import shlex
import subprocess
import sys
from typing import List, Optional, Sequence, Tuple
Expand Down Expand Up @@ -73,16 +72,16 @@ def ensure_project(ci_env: CIBase) -> None:
if not ci_env.phylum_project:
return

cmd = f"{ci_env.cli_path} project create {ci_env.phylum_project}"
cmd = [str(ci_env.cli_path), "project", "create", ci_env.phylum_project]
if ci_env.phylum_group:
print(f" [-] Using Phylum group: {ci_env.phylum_group}")
cmd = f"{ci_env.cli_path} project create --group {ci_env.phylum_group} {ci_env.phylum_project}"
cmd = [str(ci_env.cli_path), "project", "create", "--group", ci_env.phylum_group, ci_env.phylum_project]

print(f" [*] Creating a Phylum project with the name: {ci_env.phylum_project} ...")
if ci_env.phylum_project_file.exists():
print(f" [+] Overwriting existing `.phylum_project` file found at: {ci_env.phylum_project_file}")

ret = subprocess.run(shlex.split(cmd), check=False)
ret = subprocess.run(cmd, check=False)
# The Phylum CLI will return a unique error code of 14 when a project that already
# exists is attempted to be created. This situation is recognized and allowed to happen
# since it means the project exists as expected. Any other exit code is an error.
Expand All @@ -91,24 +90,30 @@ def ensure_project(ci_env: CIBase) -> None:
elif ret.returncode == 14:
print(f" [-] Project {ci_env.phylum_project} already exists. Continuing with it ...")
else:
print(f" [!] There was a problem creating the project with command: {cmd}")
# TODO: Use the `shlex.join` method, introduced in Python 3.8, to produce a shell-escaped string
# to protect against injection vulnerabilities (e.g., users copy/pasting the output here).
# https://github.com/phylum-dev/phylum-ci/issues/18
print(f" [!] There was a problem creating the project with command: {' '.join(cmd)}")
ret.check_returncode()


def get_phylum_analysis(ci_env: CIBase) -> dict:
"""Analyze a project lockfile from a given CI environment with the phylum CLI and return the analysis."""
# Build up the analyze command based on the provided inputs
cmd = f"{ci_env.cli_path} analyze -l {ci_env.phylum_label}"
cmd = [str(ci_env.cli_path), "analyze", "-l", ci_env.phylum_label]
if ci_env.phylum_project:
cmd = f"{cmd} --project {ci_env.phylum_project}"
cmd.extend(["--project", ci_env.phylum_project])
# A group can not be specified without a project
if ci_env.phylum_group:
cmd = f"{cmd} --group {ci_env.phylum_group}"
cmd = f"{cmd} --verbose --json {ci_env.lockfile}"
cmd.extend(["--group", ci_env.phylum_group])
cmd.extend(["--verbose", "--json", str(ci_env.lockfile)])

print(f" [*] Performing analysis with command: {cmd} ...")
# TODO: Use the `shlex.join` method, introduced in Python 3.8, to produce a shell-escaped string
# to protect against injection vulnerabilities (e.g., users copy/pasting the output here).
# https://github.com/phylum-dev/phylum-ci/issues/18
maxrake marked this conversation as resolved.
Show resolved Hide resolved
print(f" [*] Performing analysis with command: {' '.join(cmd)} ...")
maxrake marked this conversation as resolved.
Show resolved Hide resolved
try:
analysis_result = subprocess.run(shlex.split(cmd), check=True, capture_output=True, text=True).stdout
analysis_result = subprocess.run(cmd, check=True, capture_output=True, text=True).stdout
except subprocess.CalledProcessError as err:
# The Phylum project can set the CLI to "fail the build" if threshold requirements are not met.
# This causes the return code to be non-zero and lands us here. Check for this case to proceed.
Expand Down Expand Up @@ -183,15 +188,11 @@ def get_args(args: Optional[Sequence[str]] = None) -> Tuple[argparse.Namespace,
analysis_group.add_argument(
"-p",
"--project",
# NOTE: The method of using the shlex module is known to be compatible with UNIX shells.
# It may not function as desired for other operating systems and/or shell types.
type=shlex.quote,
help="Name of a Phylum project to create and use to perform the analysis.",
)
analysis_group.add_argument(
"-g",
"--group",
type=shlex.quote,
help="Optional group name, which will be the owner of the project. Only used when a project is also specified.",
)

Expand Down
16 changes: 8 additions & 8 deletions src/phylum/init/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ def get_expected_phylum_bin_path():

def get_phylum_cli_version(cli_path: Path) -> str:
"""Get the version of the installed and active Phylum CLI and return it."""
cmd = f"{cli_path} --version"
version = subprocess.run(cmd.split(), check=True, capture_output=True, text=True).stdout.strip().lower()
cmd = [str(cli_path), "--version"]
version = subprocess.run(cmd, check=True, capture_output=True, text=True).stdout.strip().lower()

# Starting with Python 3.9, the str.removeprefix() method was introduced to do this same thing
prefix = "phylum "
Expand Down Expand Up @@ -269,7 +269,7 @@ def setup_token(token):
# The phylum CLI settings.yaml file won't exist upon initial install
# but running a command will trigger the CLI to generate it
if not phylum_settings_path.exists():
cmd = f"{phylum_bin_path} version".split()
cmd = [str(phylum_bin_path), "version"]
subprocess.run(cmd, check=True)

yaml = YAML()
Expand All @@ -280,7 +280,7 @@ def setup_token(token):
yaml.dump(settings_dict, f)

# Check that the token was setup correctly by using it to display the current auth status
cmd = f"{phylum_bin_path} auth status".split()
cmd = [str(phylum_bin_path), "auth", "status"]
subprocess.run(cmd, check=True)


Expand Down Expand Up @@ -395,18 +395,18 @@ def main(args=None):
# Reference: https://github.com/phylum-dev/cli/pull/671
if args.global_install:
# Current assumptions for this method:
# * the /usr/local/bin directory exists, has proper permissions, is on the PATH for all users
# * the /usr/local/bin directory exists, has proper permissions, and is on the PATH for all users
# * the install is on a system with glibc
cmd = "install -m 0755 phylum /usr/local/bin/phylum".split()
cmd = ["install", "-m", "0755", "phylum", "/usr/local/bin/phylum"]
else:
cmd = "sh install.sh".split()
cmd = ["sh", "install.sh"]
subprocess.run(cmd, check=True, cwd=extracted_dir)

process_token_option(args)

# Check to ensure everything is working
phylum_bin_path, _ = get_phylum_bin_path()
cmd = f"{phylum_bin_path} --help".split()
cmd = [str(phylum_bin_path), "--help"]
subprocess.run(cmd, check=True)

return 0
Expand Down