Skip to content

Commit

Permalink
Various improvements for v0.10 (#59)
Browse files Browse the repository at this point in the history
* Bash euo pipefail in template

* Close #8. Improve logging message for input missing warning.

* Update CHANGELOG

* Improve logging for missing parameters in `dvc.yaml` during `get-config`

* Fix test

* Fix tests

* Code comments about missing_path_warnings
  • Loading branch information
grst authored Nov 19, 2024
1 parent 87b6338 commit 6209c9e
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 9 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,13 @@ and this project adheres to [Semantic Versioning][].
- Add `.gitignore` catch-all clauses for `output` and `report` folders in stages to not pick up data and repots being tracked via git ([#46](https://github.com/Boehringer-Ingelheim/dso/issues/46)).
- Every dso project is now also a [uv project](https://docs.astral.sh/uv/concepts/projects/#building-projects) declaring Python dependencies in `pyproject.toml`. This makes it possible to
use a different dso version per project and makes it easy to work with virtual Python environments ([#52](https://github.com/Boehringer-Ingelheim/dso/pull/52))
- bash templates now include `-euo pipefail` settings, ensuring that stages fail early and return a nonzero error code if something failed ([#59](https://github.com/Boehringer-Ingelheim/dso/pull/59)).

### Fixes
- Remove vendored `hiyapyco` code since required changes were released upstream in v0.7.0 ([#45](https://github.com/Boehringer-Ingelheim/dso/pull/45)).
- `None` values in `params.in.yaml` can now be used to override anything, e.g. to disable watermarking only in a specific stage ([#45](https://github.com/Boehringer-Ingelheim/dso/pull/45)).
- Improve logging for "missing path" warning during `compile-config` ([#59](https://github.com/Boehringer-Ingelheim/dso/pull/59)).
- Improve logging for missing parameters in `dvc.yaml` during `get-config` ([#59](https://github.com/Boehringer-Ingelheim/dso/pull/59)).

## v0.9.0

Expand Down
31 changes: 26 additions & 5 deletions src/dso/compile_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from ruamel.yaml import YAML, yaml_object

from ._logging import log
from ._util import _find_in_parent, check_project_roots
from ._util import _find_in_parent, check_project_roots, get_project_root

PARAMS_YAML_DISCLAIMER = dedent(
"""\
Expand All @@ -29,7 +29,11 @@
)


def _load_yaml_with_auto_adjusting_paths(yaml_stream: TextIOWrapper, destination: Path):
def _load_yaml_with_auto_adjusting_paths(
yaml_stream: TextIOWrapper,
destination: Path,
missing_path_warnings: set[tuple[Path, Path]],
):
"""
Load a yaml file and adjust paths for all !path objects based on a destination file
Expand All @@ -39,6 +43,9 @@ def _load_yaml_with_auto_adjusting_paths(yaml_stream: TextIOWrapper, destination
Path of the yaml file to load
destination
Path to which the file shall be adjusted
missing_path_warnings
set in which we keep track of warnings for missing paths to ensure we are
not emitting the same warning twice.
Returns
-------
Expand All @@ -48,6 +55,9 @@ def _load_yaml_with_auto_adjusting_paths(yaml_stream: TextIOWrapper, destination

# The folder of the source config file
source = Path(yaml_stream.name).parent
# stage name for logging purposes only
stage = source.relative_to(get_project_root(source))

if not destination.is_relative_to(source):
raise ValueError("Destination path can be the same as source, or a child thereof.")

Expand All @@ -66,8 +76,11 @@ class AutoAdjustingPathWithLocation(str):
def __init__(self, path: str):
self.path = Path(path)
if not (source / self.path).exists():
# Warn, but do not fail (it could also be an output path to be populated by a dvc stage)
log.warning(f"Path {self.path} does not exist!")
# check if warning for the same path was already emitted
if (self.path, source) not in missing_path_warnings:
# Warn, but do not fail (it could also be an output path to be populated by a dvc stage)
log.warning(f"Path {self.path} in stage {stage} does not exist!")
missing_path_warnings.add((self.path, source))

def get_adjusted(self):
# not possible with pathlib, because pathlib requires the paths to be subpaths of each other
Expand Down Expand Up @@ -143,6 +156,10 @@ def compile_all_configs(paths: Sequence[Path]):
all_configs = _get_list_of_configs_to_compile(paths, project_root)
log.info(f"Compiling a total of {len(all_configs)} config files.")

# keep track of paths for which we emitted a warning that the path doesn't exist to ensure
# we are only emitting one warning for each (file path, source yaml file path)
missing_path_warnings: set[tuple[Path, Path]] = set()

for config in all_configs:
# sorted sorts path from parent to child. This is what we want as hyapyco gives precedence to configs
# later in the list.
Expand All @@ -152,7 +169,11 @@ def compile_all_configs(paths: Sequence[Path]):
method=hiyapyco.METHOD_MERGE,
none_behavior=hiyapyco.NONE_BEHAVIOR_OVERRIDE,
interpolate=True,
loader_callback=partial(_load_yaml_with_auto_adjusting_paths, destination=config.parent),
loader_callback=partial(
_load_yaml_with_auto_adjusting_paths,
destination=config.parent,
missing_path_warnings=missing_path_warnings,
),
)
# an empty configuration should actually be an empty dictionary.
if conf is None:
Expand Down
4 changes: 2 additions & 2 deletions src/dso/get_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,6 @@ def cli(stage, all, skip_compile):
out_config = get_config(stage, all=all, skip_compile=skip_compile)
yaml = YAML()
yaml.dump(out_config, sys.stdout)
except KeyError:
log.error("dvc.yaml defines parameter that is not in params.yaml")
except KeyError as e:
log.error(f"dvc.yaml defines parameter {e} that is not in params.yaml")
sys.exit(1)
2 changes: 1 addition & 1 deletion src/dso/templates/stage/bash/dvc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ stages:
- output
cmd:
- |
bash << EOF
bash -euo pipefail << EOF
# add bash code here
Expand Down
5 changes: 4 additions & 1 deletion tests/test_compile_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def test_auto_adjusting_path(tmp_path, interpolate):
as an object that can be dumped by ruamel using the custom representer.
"""
test_file = tmp_path / "params.in.yaml"
(tmp_path / ".git").mkdir()
destination = tmp_path / "subproject1" / "stageA"
destination.mkdir(parents=True)
(tmp_path / "test.txt").touch() # create fake file, otherwise check for missing file will fail.
Expand All @@ -51,7 +52,9 @@ def test_auto_adjusting_path(tmp_path, interpolate):
str(test_file),
method=hiyapyco.METHOD_MERGE,
interpolate=interpolate,
loader_callback=partial(_load_yaml_with_auto_adjusting_paths, destination=destination),
loader_callback=partial(
_load_yaml_with_auto_adjusting_paths, destination=destination, missing_path_warnings=set()
),
)

ruamel = YAML()
Expand Down

0 comments on commit 6209c9e

Please sign in to comment.