From 6209c9e5ee852224f509bdeccbc5c27bc9d21441 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Tue, 19 Nov 2024 08:42:36 +0100 Subject: [PATCH] Various improvements for v0.10 (#59) * Bash euo pipefail in template * Close #8. Improve logging message for input missing warning. * Update CHANGELOG * Improve logging for missing parameters in `dvc.yaml` during `get-config` * Fix test * Fix tests * Code comments about missing_path_warnings --- CHANGELOG.md | 3 +++ src/dso/compile_config.py | 31 ++++++++++++++++++++++----- src/dso/get_config.py | 4 ++-- src/dso/templates/stage/bash/dvc.yaml | 2 +- tests/test_compile_config.py | 5 ++++- 5 files changed, 36 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ce915c1..e4af89a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,10 +15,13 @@ and this project adheres to [Semantic Versioning][]. - Add `.gitignore` catch-all clauses for `output` and `report` folders in stages to not pick up data and repots being tracked via git ([#46](https://github.com/Boehringer-Ingelheim/dso/issues/46)). - Every dso project is now also a [uv project](https://docs.astral.sh/uv/concepts/projects/#building-projects) declaring Python dependencies in `pyproject.toml`. This makes it possible to use a different dso version per project and makes it easy to work with virtual Python environments ([#52](https://github.com/Boehringer-Ingelheim/dso/pull/52)) +- bash templates now include `-euo pipefail` settings, ensuring that stages fail early and return a nonzero error code if something failed ([#59](https://github.com/Boehringer-Ingelheim/dso/pull/59)). ### Fixes - Remove vendored `hiyapyco` code since required changes were released upstream in v0.7.0 ([#45](https://github.com/Boehringer-Ingelheim/dso/pull/45)). - `None` values in `params.in.yaml` can now be used to override anything, e.g. to disable watermarking only in a specific stage ([#45](https://github.com/Boehringer-Ingelheim/dso/pull/45)). +- Improve logging for "missing path" warning during `compile-config` ([#59](https://github.com/Boehringer-Ingelheim/dso/pull/59)). +- Improve logging for missing parameters in `dvc.yaml` during `get-config` ([#59](https://github.com/Boehringer-Ingelheim/dso/pull/59)). ## v0.9.0 diff --git a/src/dso/compile_config.py b/src/dso/compile_config.py index 64c05e1..d38605f 100644 --- a/src/dso/compile_config.py +++ b/src/dso/compile_config.py @@ -13,7 +13,7 @@ from ruamel.yaml import YAML, yaml_object from ._logging import log -from ._util import _find_in_parent, check_project_roots +from ._util import _find_in_parent, check_project_roots, get_project_root PARAMS_YAML_DISCLAIMER = dedent( """\ @@ -29,7 +29,11 @@ ) -def _load_yaml_with_auto_adjusting_paths(yaml_stream: TextIOWrapper, destination: Path): +def _load_yaml_with_auto_adjusting_paths( + yaml_stream: TextIOWrapper, + destination: Path, + missing_path_warnings: set[tuple[Path, Path]], +): """ Load a yaml file and adjust paths for all !path objects based on a destination file @@ -39,6 +43,9 @@ def _load_yaml_with_auto_adjusting_paths(yaml_stream: TextIOWrapper, destination Path of the yaml file to load destination Path to which the file shall be adjusted + missing_path_warnings + set in which we keep track of warnings for missing paths to ensure we are + not emitting the same warning twice. Returns ------- @@ -48,6 +55,9 @@ def _load_yaml_with_auto_adjusting_paths(yaml_stream: TextIOWrapper, destination # The folder of the source config file source = Path(yaml_stream.name).parent + # stage name for logging purposes only + stage = source.relative_to(get_project_root(source)) + if not destination.is_relative_to(source): raise ValueError("Destination path can be the same as source, or a child thereof.") @@ -66,8 +76,11 @@ class AutoAdjustingPathWithLocation(str): def __init__(self, path: str): self.path = Path(path) if not (source / self.path).exists(): - # Warn, but do not fail (it could also be an output path to be populated by a dvc stage) - log.warning(f"Path {self.path} does not exist!") + # check if warning for the same path was already emitted + if (self.path, source) not in missing_path_warnings: + # Warn, but do not fail (it could also be an output path to be populated by a dvc stage) + log.warning(f"Path {self.path} in stage {stage} does not exist!") + missing_path_warnings.add((self.path, source)) def get_adjusted(self): # not possible with pathlib, because pathlib requires the paths to be subpaths of each other @@ -143,6 +156,10 @@ def compile_all_configs(paths: Sequence[Path]): all_configs = _get_list_of_configs_to_compile(paths, project_root) log.info(f"Compiling a total of {len(all_configs)} config files.") + # keep track of paths for which we emitted a warning that the path doesn't exist to ensure + # we are only emitting one warning for each (file path, source yaml file path) + missing_path_warnings: set[tuple[Path, Path]] = set() + for config in all_configs: # sorted sorts path from parent to child. This is what we want as hyapyco gives precedence to configs # later in the list. @@ -152,7 +169,11 @@ def compile_all_configs(paths: Sequence[Path]): method=hiyapyco.METHOD_MERGE, none_behavior=hiyapyco.NONE_BEHAVIOR_OVERRIDE, interpolate=True, - loader_callback=partial(_load_yaml_with_auto_adjusting_paths, destination=config.parent), + loader_callback=partial( + _load_yaml_with_auto_adjusting_paths, + destination=config.parent, + missing_path_warnings=missing_path_warnings, + ), ) # an empty configuration should actually be an empty dictionary. if conf is None: diff --git a/src/dso/get_config.py b/src/dso/get_config.py index 924ed1c..a4074f8 100644 --- a/src/dso/get_config.py +++ b/src/dso/get_config.py @@ -148,6 +148,6 @@ def cli(stage, all, skip_compile): out_config = get_config(stage, all=all, skip_compile=skip_compile) yaml = YAML() yaml.dump(out_config, sys.stdout) - except KeyError: - log.error("dvc.yaml defines parameter that is not in params.yaml") + except KeyError as e: + log.error(f"dvc.yaml defines parameter {e} that is not in params.yaml") sys.exit(1) diff --git a/src/dso/templates/stage/bash/dvc.yaml b/src/dso/templates/stage/bash/dvc.yaml index d0ceeb1..1f31f9e 100644 --- a/src/dso/templates/stage/bash/dvc.yaml +++ b/src/dso/templates/stage/bash/dvc.yaml @@ -6,7 +6,7 @@ stages: - output cmd: - | - bash << EOF + bash -euo pipefail << EOF # add bash code here diff --git a/tests/test_compile_config.py b/tests/test_compile_config.py index 6586af4..2c1f616 100644 --- a/tests/test_compile_config.py +++ b/tests/test_compile_config.py @@ -34,6 +34,7 @@ def test_auto_adjusting_path(tmp_path, interpolate): as an object that can be dumped by ruamel using the custom representer. """ test_file = tmp_path / "params.in.yaml" + (tmp_path / ".git").mkdir() destination = tmp_path / "subproject1" / "stageA" destination.mkdir(parents=True) (tmp_path / "test.txt").touch() # create fake file, otherwise check for missing file will fail. @@ -51,7 +52,9 @@ def test_auto_adjusting_path(tmp_path, interpolate): str(test_file), method=hiyapyco.METHOD_MERGE, interpolate=interpolate, - loader_callback=partial(_load_yaml_with_auto_adjusting_paths, destination=destination), + loader_callback=partial( + _load_yaml_with_auto_adjusting_paths, destination=destination, missing_path_warnings=set() + ), ) ruamel = YAML()