From 88572c41bc48b9bf97e45a8585a3b23b29bcddcc Mon Sep 17 00:00:00 2001 From: Kurt von Laven Date: Sun, 5 Mar 2023 21:53:45 -0800 Subject: [PATCH] Support mixing remote and local configs in EXTENDS EXTENDS accepts both absolute URLs and relative file paths to config files to inherit from. Stop assuming that all relative file paths are relative to the current workspace. This assumption does not hold when parsing a config inherited from a different repository. This situation arises most simply when A inherits from B via an absolute URL, and B inherits from C via a relative file path. Both inherited config files, B and C, are in a different repository than A. Make a best effort to infer the URL of the repository root, and use that to correctly resolve relative file paths within that repository recursively. --- .github/linters/.cspell.json | 8 ++++ CHANGELOG.md | 3 ++ megalinter/config.py | 73 ++++++++++++++++++++++++++++++------ 3 files changed, 72 insertions(+), 12 deletions(-) diff --git a/.github/linters/.cspell.json b/.github/linters/.cspell.json index f9e80d7af60..cafb66375c8 100644 --- a/.github/linters/.cspell.json +++ b/.github/linters/.cspell.json @@ -501,6 +501,7 @@ "cmidrule", "codacy", "codebases", + "codeberg", "codeclimate", "codecov", "codenarcargs", @@ -680,6 +681,8 @@ "gijsreyn", "gitattributes", "gitblame", + "gitea", + "gitee", "gitlab", "gitleaks", "gitmodified", @@ -733,6 +736,7 @@ "htmlhint", "htmlhintrc", "htmlout", + "huggingface", "hyhs", "idiv", "ighe", @@ -777,6 +781,7 @@ "joereynolds", "jscoverage", "jscpd", + "jsdelivr", "jsonify", "jsonlint", "jsonlintrc", @@ -1003,6 +1008,7 @@ "packagename", "pagebreak", "pageref", + "pagure", "pandoc", "parallelization", "paren", @@ -1016,6 +1022,7 @@ "perlcriticrc", "pgfpicture", "phar", + "phcdn", "phive", "phpcs", "phplint", @@ -1132,6 +1139,7 @@ "returncode", "returnrules", "rexec", + "rhodecode", "risd", "rmfamily", "rockspec", diff --git a/CHANGELOG.md b/CHANGELOG.md index a39b5dde09f..031c08cfcf3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,9 @@ Note: Can be used with `oxsecurity/megalinter@beta` in your GitHub Action mega-l - Upgrade create-pull-request and create-or-update-comment GitHub Actions - Increase auto-update-linters GitHub Action timeout - Upgrade base Docker image to python:3.11.3-alpine3.17 + - Fix a config inheritance bug that prevented extending a remote config that + extends a local config by @Kurt-von-Laven + ([#2371](https://github.com/oxsecurity/megalinter/issues/2371)). - Documentation diff --git a/megalinter/config.py b/megalinter/config.py index 35b03b565d5..a1498eefc4c 100644 --- a/megalinter/config.py +++ b/megalinter/config.py @@ -4,6 +4,10 @@ import os import shlex import tempfile +from collections.abc import Mapping, Sequence +from pathlib import Path, PurePath +from typing import AnyStr, cast +from urllib.parse import ParseResult, urlparse, urlunparse import requests import yaml @@ -11,6 +15,11 @@ CONFIG_DATA = None CONFIG_SOURCE = None +JsonValue = ( + None | bool | int | float | str | Sequence["JsonValue"] | Mapping[str, "JsonValue"] +) +JsonObject = dict[str, JsonValue] + def init_config(workspace=None): global CONFIG_DATA, CONFIG_SOURCE @@ -72,7 +81,7 @@ def init_config(workspace=None): ) # manage EXTENDS in configuration if "EXTENDS" in runtime_config: - combined_config = {} + combined_config: JsonObject = {} CONFIG_SOURCE = combine_config( workspace, runtime_config, combined_config, CONFIG_SOURCE ) @@ -82,22 +91,32 @@ def init_config(workspace=None): set_config(runtime_config) -def combine_config(workspace, config, combined_config, config_source): - extends = config["EXTENDS"] +def combine_config( + workspace: str | None, + config: JsonObject, + combined_config: JsonObject, + config_source: str, + child_uri: ParseResult | None = None, +) -> str: + workspace_path = Path(workspace) if workspace else None + parsed_uri: ParseResult | None = None + extends = cast(str | Sequence[str], config["EXTENDS"]) if isinstance(extends, str): extends = extends.split(",") for extends_item in extends: if extends_item.startswith("http"): - r = requests.get(extends_item, allow_redirects=True) - assert ( - r.status_code == 200 - ), f"Unable to retrieve EXTENDS config file {extends_item}" - extends_config_data = yaml.safe_load(r.content) + parsed_uri = urlparse(extends_item) + extends_config_data = download_config(extends_item) else: - with open( - workspace + os.path.sep + extends_item, "r", encoding="utf-8" - ) as f: - extends_config_data = yaml.safe_load(f) + path = PurePath(extends_item) + if child_uri: + parsed_uri = resolve_uri(child_uri, path) + uri = urlunparse(parsed_uri) + extends_config_data = download_config(uri) + else: + resolved_path = workspace_path / path if workspace_path else Path(path) + with resolved_path.open("r", encoding="utf-8") as f: + extends_config_data = yaml.safe_load(f) combined_config.update(extends_config_data) config_source += f"\n[config] - extends from: {extends_item}" if "EXTENDS" in extends_config_data: @@ -106,11 +125,41 @@ def combine_config(workspace, config, combined_config, config_source): extends_config_data, combined_config, config_source, + parsed_uri, ) combined_config.update(config) return config_source +def download_config(uri: AnyStr) -> JsonObject: + r = requests.get(uri, allow_redirects=True) + assert r.status_code == 200, f"Unable to retrieve EXTENDS config file {uri!r}" + return yaml.safe_load(r.content) + + +def resolve_uri(child_uri: ParseResult, relative_config_path: PurePath) -> ParseResult: + match child_uri.netloc: + case "cdn.jsdelivr.net" | "git.launchpad.net": + repo_root_index = 3 + case "code.rhodecode.com" | "git.savannah.gnu.org" | "raw.githubusercontent.com" | "repo.or.cz": + repo_root_index = 4 + case "bitbucket.org" | "git.sr.ht" | "gitee.com" | "pagure.io": + repo_root_index = 5 + case "codeberg.org" | "gitea.com" | "gitlab.com" | "huggingface.co" | "p.phcdn.net" | "sourceforge.net": + repo_root_index = 6 + case _: + message = ( + f"Unsupported Git repo hosting service: {child_uri.netloc}. " + "Request support be added to MegaLinter, or use absolute URLs " + "with EXTENDS in inherited configs rather than relative paths." + ) + raise ValueError(message) + child_path = PurePath(child_uri.path) + repo_root_path = child_path.parts[:repo_root_index] + path = PurePath(*repo_root_path, str(relative_config_path)) + return child_uri._replace(path=str(path)) + + def get_config(): global CONFIG_DATA if CONFIG_DATA is not None: