From d00f518d0e7f386c418442878d913f907c0ed46c Mon Sep 17 00:00:00 2001 From: Jules Bertrand <33326907+julesbertrand@users.noreply.github.com> Date: Fri, 14 Jun 2024 08:54:16 +0200 Subject: [PATCH] enh: add yaml support (#193) --- README.md | 38 +++++++++++++++++++------------------- deployer/utils/config.py | 25 +++++++++++++++++++++++++ docs/CLI_REFERENCE.md | 2 +- docs/configuration.md | 24 ++++++++++++++++++++++++ mkdocs.yml | 2 ++ 5 files changed, 71 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index a18c7d4..9b1ce21 100644 --- a/README.md +++ b/README.md @@ -238,15 +238,12 @@ def pipeline(): #### Configs -Config file can be either `.py`, `.json` or `.toml` files. +Config file can be either `.py`, `.json`, `.toml` or `yaml` format. They must be located in the `config/{pipeline_name}` folder. -!!! question "Why not YAML?" - YAML is not supported yet. Feel free to open a PR if you want to add it. - **Why multiple formats?** -`.py` files are useful to define complex configs (e.g. a list of dicts) while `.json` / `.toml` files are useful to define simple configs (e.g. a string). +`.py` files are useful to define complex configs (e.g. a list of dicts) while `.json` / `.toml` / `yaml` files are useful to define simple configs (e.g. a string). It also adds flexibility to the user and allows you to use the deployer with almost no migration cost. **How to format them?** @@ -262,20 +259,23 @@ It also adds flexibility to the user and allows you to use the deployer with alm Section names will be joined using `"_"` separator and this is not configurable at the moment. Example: -=== "TOML file" - ```toml - [modeling] - model_name = "my-model" - params = { lambda = 0.1 } - ``` - -=== "Resulting parameter values" - ```python - { - "modeling_model_name": "my-model", - "modeling_params": { "lambda": 0.1 } - } - ``` + === "TOML file" + ```toml + [modeling] + model_name = "my-model" + params = { lambda = 0.1 } + ``` + + === "Resulting parameter values" + ```python + { + "modeling_model_name": "my-model", + "modeling_params": { "lambda": 0.1 } + } + ``` + +- `.yaml` files must be valid yaml files containing only one dict of key: value representing parameter values. + ??? question "Why are sections flattened when using TOML config files?" Vertex Pipelines parameter validation and parameter logging to Vertex Experiments are based on the parameter name. If you do not flatten your sections, you'll only be able to validate section names and that they should be of type `dict`. diff --git a/deployer/utils/config.py b/deployer/utils/config.py index eb3392a..fd8f0e1 100644 --- a/deployer/utils/config.py +++ b/deployer/utils/config.py @@ -5,6 +5,7 @@ from typing import List, Optional, Tuple, Union import tomlkit.items +import yaml from loguru import logger from pydantic import ValidationError from pydantic_settings import BaseSettings, SettingsConfigDict @@ -83,6 +84,7 @@ class ConfigType(str, Enum): # noqa: D101 json = "json" py = "py" toml = "toml" + yaml = "yaml" def list_config_filepaths(configs_root_path: Path, pipeline_name: str) -> List[Path]: @@ -133,6 +135,10 @@ def load_config(config_filepath: Path) -> Tuple[Optional[dict], Optional[dict]]: parameter_values = _load_config_toml(config_filepath) return parameter_values, None + if config_filepath.suffix == ".yaml": + parameter_values = _load_config_yaml(config_filepath) + return parameter_values, None + if config_filepath.suffix == ".py": parameter_values, input_artifacts = _load_config_python(config_filepath) return parameter_values, input_artifacts @@ -220,3 +226,22 @@ def flatten_toml_document( ) from e return parameter_values + + +def _load_config_yaml(config_filepath: Path) -> dict: + """Load the parameter values from a YAML config file. + + Args: + config_filepath (Path): A `Path` object representing the path to the config file. + + Returns: + dict: The loaded parameter values. + """ + with open(config_filepath, "r") as f: + try: + parameter_values = yaml.safe_load(f) + except yaml.YAMLError as e: + raise BadConfigError( + f"{config_filepath}: invalid YAML config file.\n{e.__class__.__name__}: {e}" + ) from e + return parameter_values diff --git a/docs/CLI_REFERENCE.md b/docs/CLI_REFERENCE.md index e59a43d..53cb2e4 100644 --- a/docs/CLI_REFERENCE.md +++ b/docs/CLI_REFERENCE.md @@ -91,7 +91,7 @@ $ vertex-deployer create [OPTIONS] PIPELINE_NAMES... **Options**: -* `-ct, --config-type [json|py|toml]`: The type of the config to create. [default: py] +* `-ct, --config-type [json|py|toml|yaml]`: The type of the config to create. [default: py] * `--help`: Show this message and exit. ## `vertex-deployer deploy` diff --git a/docs/configuration.md b/docs/configuration.md index ca4b20f..9366a93 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -36,6 +36,7 @@ The choice of format depends on the complexity and requirements of the configura Python files allow for complex configurations and dynamic values, while JSON and TOML files are more suitable for static and simple configurations. For example, you have here the same config file in the three formats: + === "JSON" ```json title="vertex/configs/dummy_pipeline/config_test.json" { @@ -77,6 +78,29 @@ For example, you have here the same config file in the three formats: Then, these sections are flattened, except for inline dicts, leading to slightly different parameter names (e.g., `modeling_grid_search_lambda` instead of `lambda`). + +=== "YAML" + ```yaml title="vertex/configs/dummy_pipeline/config_test.yaml" + model_name: my-model + default_params: + lambda: 0.1 + alpha: hello world + grid_search: + lambda: + - 0.1 + - 0.2 + - 0.3 + alpha: + - hello world + - goodbye world + cv: 3 + ``` + + YAML config files are similar to TOML files in terms of flexibility and verbosity. + + They are more human-readable than TOML files, but they are also more error-prone due to indentation. + + === "Python" ```python title="vertex/configs/dummy_pipeline/config_test.py" parameter_values = { diff --git a/mkdocs.yml b/mkdocs.yml index 46d1dcc..ce32567 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -37,6 +37,8 @@ markdown_extensions: - pymdownx.details - pymdownx.snippets - pymdownx.superfences + - pymdownx.tabbed: + alternate_style: true extra_css: - stylesheets/extra.css