chore(release): 0.2.0 (#71)

artefactory · Oct 6, 2023 · 718200d · 718200d
2 parents 53b3372 + b997e69
commit 718200d
Show file tree

Hide file tree

Showing 14 changed files with 229 additions and 40 deletions.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -58,7 +58,7 @@ and [Poetry](https://python-poetry.org/docs/cli/#build) to create releases and t
 The release process is automated through GitHub Actions. Here is the process:
 
 - Create a Pull Request from `develop` to `main`.
-- Merge the Pull Request. This can be a merge commit or a squash and merge.
+- Merge the Pull Request. This must create a merge commit.
 - The merge will trigger the Release GitHub Action defined in [this workflow](.github/workflows/release.yaml).
 
 The Release GitHub Action does the following:
@@ -69,8 +69,6 @@ The Release GitHub Action does the following:
 
 The action is triggered by any push to main.
 
-Here is the relevant part of the GitHub Action:
-
 > [!NOTE]
 > The release action will be triggered by any push to `main` only if the 'CI' job in the 'release.yaml' workflow succeeds.
 > Python Semantic Release will take care of version number update, tag creation and release creation.
diff --git a/README.md b/README.md
@@ -39,6 +39,7 @@
     - [`list`](#list)
   - [CLI: Options](#cli-options)
 
+[Full CLI documentation](docs/CLI_REFERENCE.md)
 
 ## Why this tool?
 
@@ -96,6 +97,15 @@ List available versions:
 gsutil ls gs://vertex-pipelines-deployer
 ```
 
+### Add to requirements
+
+It's better to get the .tar.gz archive from gcs, and version it.
+
+Then add the following line to your `requirements.in` file:
+```bash
+file:my/path/to/vertex_deployer-$VERSION.tar.gz
+```
+
 ## Usage
 
 ### Setup
@@ -164,7 +174,7 @@ gcloud artifacts repositories add-iam-policy-binding ${GAR_PIPELINES_REPO_ID} \
    --role="roles/artifactregistry.admin"
 ```
 
-You can use the deployer CLI (see example below) or import [`VertexPipelineDeployer`](deployer/deployer.py) in your code (try it yourself).
+You can use the deployer CLI (see example below) or import [`VertexPipelineDeployer`](deployer/pipeline_deployer.py) in your code (try it yourself).
 
 ### Folder Structure
 
@@ -186,28 +196,43 @@ vertex
 
 #### Pipelines
 
-You file `{pipeline_name}.py` must contain a function called `pipeline` decorated using `kfp.dsl.pipeline`.
+You file `{pipeline_name}.py` must contain a function called `{pipeline_name}` decorated using `kfp.dsl.pipeline`.
+In previous versions, the functions / object used to be called `pipeline` but it was changed to `{pipeline_name}` to avoid confusion with the `kfp.dsl.pipeline` decorator.
+
+```python
+# vertex/pipelines/dummy_pipeline.py
+import kfp.dsl
 
+# New name to avoid confusion with the kfp.dsl.pipeline decorator
+@kfp.dsl.pipeline()
+def dummy_pipeline():
+    ...
+
+# Old name
+@kfp.dsl.pipeline()
+def pipeline():
+    ...
+```
 
 #### Configs
 
-Config file can be either `.py` files or `.json` files.
+Config file can be either `.py`, `.json` or `.toml` files.
 They must be located in the `config/{pipeline_name}` folder.
 
 **Why two formats?**
 
-`.py` files are useful to define complex configs (e.g. a list of dicts) while `.json` files are useful to define simple configs (e.g. a string).
+`.py` files are useful to define complex configs (e.g. a list of dicts) while `.json` / `.toml` files are useful to define simple configs (e.g. a string).
 
 **How to format them?**
-- `.json` files must be valid json files containing only one dict of key: value.
+- `.json` and `.toml` files must be valid json files containing only one dict of key: value representing parameter values.
 - `.py` files must be valid python files with two important elements:
     - `parameter_values` to pass arguments to your pipeline
     - `input_artifacts` if you want to retrieve and create input artifacts to your pipeline.
         See [Vertex Documentation](https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.PipelineJob) for more information.
 
 **How to name them?**
 
-`{config_name}.json` or `{config_name}.py`. config_name is free but must be unique for a given pipeline.
+`{config_name}.py` or `{config_name}.json` or `{config_name}.toml`. config_name is free but must be unique for a given pipeline.
 
 
 #### Settings
@@ -300,7 +325,9 @@ vertex-deployer --log-level DEBUG deploy ...
 ├─ .github
 │  ├─ ISSUE_TEMPLATE/
 │  ├─ workflows
-│  │  └─ ci.yaml
+│  │  ├─ ci.yaml
+│  │  ├─ pr_agent.yaml
+│  │  └─ release.yaml
 │  ├─ CODEOWNERS
 │  └─ PULL_REQUEST_TEMPLATE.md
 ├─ deployer
@@ -325,7 +352,9 @@ vertex-deployer --log-level DEBUG deploy ...
 │      │  ├─ broken_pipeline
 │      │  │  └─ config_test.json
 │      │  └─ dummy_pipeline
-│      │     └─ config_test.json
+│      │     ├─ config_test.json
+│      │     ├─ config.py
+│      │     └─ config.toml
 │      ├─ deployment
 │      ├─ lib
 │      └─ pipelines

diff --git a/deployer/cli.py b/deployer/cli.py
@@ -272,7 +272,7 @@ def check(
     Checking that a pipeline is valid includes:
 
     * Checking that the pipeline can be imported. It must be a valid python module with a
-    `pipeline` function decorated with `@kfp.dsl.pipeline`.
+    `{pipeline_name}` function decorated with `@kfp.dsl.pipeline`.
 
     * Checking that the pipeline can be compiled using `kfp.compiler.Compiler`.
 

diff --git a/deployer/pipeline_checks.py b/deployer/pipeline_checks.py
@@ -5,6 +5,7 @@
 from loguru import logger
 from pydantic import Field, ValidationError, computed_field, model_validator
 from pydantic.functional_validators import ModelWrapValidatorHandler
+from pydantic_core import PydanticCustomError
 from typing_extensions import Annotated
 
 from deployer.constants import (
@@ -14,6 +15,7 @@
 )
 from deployer.pipeline_deployer import VertexPipelineDeployer
 from deployer.utils.config import list_config_filepaths, load_config
+from deployer.utils.exceptions import BadConfigError
 from deployer.utils.logging import disable_logger
 from deployer.utils.models import CustomBaseModel, create_model_from_pipeline
 from deployer.utils.utils import (
@@ -26,10 +28,29 @@
 PipelineName = make_enum_from_python_package_dir(PIPELINE_ROOT_PATH)
 
 
-class DynamicConfigsModel(CustomBaseModel, Generic[PipelineConfigT]):
+class ConfigDynamicModel(CustomBaseModel, Generic[PipelineConfigT]):
     """Model used to generate checks for configs based on pipeline dynamic model"""
 
-    configs: Dict[str, PipelineConfigT]
+    config_path: Path
+    config: PipelineConfigT
+
+    @model_validator(mode="before")
+    @classmethod
+    def load_config_if_empty(cls, data: Any) -> Any:
+        """Load config if it is empty"""
+        if data.get("config") is None:
+            try:
+                parameter_values, input_artifacts = load_config(data["config_path"])
+            except BadConfigError as e:
+                raise PydanticCustomError("BadConfigError", str(e)) from e
+            data["config"] = {**(parameter_values or {}), **(input_artifacts or {})}
+        return data
+
+
+class ConfigsDynamicModel(CustomBaseModel, Generic[PipelineConfigT]):
+    """Model used to generate checks for configs based on pipeline dynamic model"""
+
+    configs: Dict[str, ConfigDynamicModel[PipelineConfigT]]
 
 
 class Pipeline(CustomBaseModel):
@@ -49,15 +70,12 @@ def populate_config_names(cls, data: Any) -> Any:
     @computed_field
     def pipeline(self) -> Any:
         """Import pipeline"""
-        with disable_logger("deployer.utils.utils"):
-            return import_pipeline_from_dir(PIPELINE_ROOT_PATH, self.pipeline_name.value)
-
-    @computed_field()
-    def configs(self) -> Any:
-        """Load configs"""
-        configs = [load_config(config_path) for config_path in self.config_paths]
-        configs = [{**(pv or {}), **(ia or {})} for pv, ia in configs]
-        return configs
+        if getattr(self, "_pipeline", None) is None:
+            with disable_logger("deployer.utils.utils"):
+                self._pipeline = import_pipeline_from_dir(
+                    PIPELINE_ROOT_PATH, self.pipeline_name.value
+                )
+        return self._pipeline
 
     @model_validator(mode="after")
     def import_pipeline(self):
@@ -89,9 +107,9 @@ def validate_configs(self):
         """Validate configs against pipeline parameters definition"""
         logger.debug(f"Validating configs for pipeline {self.pipeline_name.value}")
         PipelineDynamicModel = create_model_from_pipeline(self.pipeline)
-        ConfigsModel = DynamicConfigsModel[PipelineDynamicModel]
+        ConfigsModel = ConfigsDynamicModel[PipelineDynamicModel]
         ConfigsModel.model_validate(
-            {"configs": dict(zip([x.name for x in self.config_paths], self.configs))}
+            {"configs": {x.name: {"config_path": x} for x in self.config_paths}}
         )
         return self
 

diff --git a/deployer/utils/config.py b/deployer/utils/config.py
@@ -4,10 +4,11 @@
 from pathlib import Path
 from typing import List, Optional, Tuple, Union
 
+import toml
 from pydantic import ValidationError
 from pydantic_settings import BaseSettings, SettingsConfigDict
 
-from deployer.utils.exceptions import UnsupportedConfigFileError
+from deployer.utils.exceptions import BadConfigError, UnsupportedConfigFileError
 
 
 class VertexPipelinesSettings(BaseSettings):  # noqa: D101
@@ -39,6 +40,7 @@ def load_vertex_settings(env_file: Optional[Path] = None) -> VertexPipelinesSett
 class ConfigType(str, Enum):  # noqa: D101
     json = "json"
     py = "py"
+    toml = "toml"
 
 
 def list_config_filepaths(config_root_path: Union[Path, str], pipeline_name: str) -> List[Path]:
@@ -85,6 +87,10 @@ def load_config(config_filepath: Path) -> Tuple[Optional[dict], Optional[dict]]:
             parameter_values = json.load(f)
         return parameter_values, None
 
+    if config_filepath.suffix == ".toml":
+        parameter_values = toml.load(config_filepath)
+        return parameter_values, None
+
     if config_filepath.suffix == ".py":
         parameter_values, input_artifacts = _load_config_python(config_filepath)
         return parameter_values, input_artifacts
@@ -119,15 +125,15 @@ def _load_config_python(config_filepath: Path) -> Tuple[Optional[dict], Optional
     input_artifacts = getattr(module, "input_artifacts", None)
 
     if parameter_values is None and input_artifacts is None:
-        raise ValueError(
+        raise BadConfigError(
             f"{config_filepath}: Python config file must contain a `parameter_values` "
             "and/or `input_artifacts` dict."
         )
 
     if parameter_values is not None and input_artifacts is not None:
         common_keys = set(parameter_values.keys()).intersection(set(input_artifacts.keys()))
         if common_keys:
-            raise ValueError(
+            raise BadConfigError(
                 f"{config_filepath}: Python config file must not contain common keys in "
                 "`parameter_values` and `input_artifacts` dict. Common keys: {common_keys}"
             )

diff --git a/deployer/utils/exceptions.py b/deployer/utils/exceptions.py
@@ -8,3 +8,7 @@ class MissingGoogleArtifactRegistryHostError(Exception):
 
 class UnsupportedConfigFileError(Exception):
     """Raised when the config file is not supported."""
+
+
+class BadConfigError(ValueError):
+    """Raised when a config is invalid."""
diff --git a/deployer/utils/models.py b/deployer/utils/models.py
@@ -2,6 +2,7 @@
 from typing import Literal
 
 import kfp.components.graph_component
+import kfp.dsl
 from pydantic import BaseModel, ConfigDict, create_model
 from typing_extensions import _AnnotatedAlias
 

diff --git a/deployer/utils/utils.py b/deployer/utils/utils.py
@@ -1,4 +1,5 @@
 import importlib
+import warnings
 from enum import Enum
 from pathlib import Path
 from typing import Dict, Optional
@@ -40,11 +41,22 @@ def import_pipeline_from_dir(dirpath: Path, pipeline_name: str) -> graph_compone
         ) from e
 
     try:
-        pipeline: Optional[graph_component.GraphComponent] = pipeline_module.pipeline
+        pipeline: Optional[graph_component.GraphComponent]
+        pipeline = getattr(pipeline_module, pipeline_name, None)
+        if pipeline is None:
+            pipeline = pipeline_module.pipeline
+            warnings.warn(
+                f"Pipeline in `{module_path}` is named `pipeline` instead of `{pipeline_name}`. "
+                "This is deprecated and will be removed in a future version. "
+                f"Please rename your pipeline to `{pipeline_name}`.",
+                FutureWarning,
+                stacklevel=1,
+            )
     except AttributeError as e:
         raise ImportError(
-            f"Pipeline {module_path}:pipeline not found. "
+            f"Pipeline object not found in `{module_path}`. "
             "Please check that the pipeline is correctly defined and named."
+            f"It should be named `{pipeline_name}` or `pipeline` (deprecated)."
         ) from e
 
     logger.debug(f"Pipeline {module_path} imported successfully.")
@@ -82,7 +94,7 @@ def print_pipelines_list(pipelines_dict: Dict[str, list], with_configs: bool = F
     console.print(table)
 
 
-def print_check_results_table(
+def print_check_results_table(  # noqa: C901
     to_check: Dict[str, list], validation_error: Optional[ValidationError] = None
 ) -> None:
     """This function prints a table of check results to the console.
@@ -126,7 +138,6 @@ def print_check_results_table(
                 table.add_row(*row.model_dump().values(), style="bold yellow")
 
         elif len(errors) == 1 and len(errors[0]["loc"]) == 2:
-            print(errors)
             row = ChecksTableRow(
                 status="❌",
                 pipeline=pipeline_name,
@@ -140,19 +151,17 @@ def print_check_results_table(
                 error_rows = []
                 for error in errors:
                     if error["loc"][3] == config_filepath.name:
-                        error_row = {
-                            "type": error["type"],
-                            "attribute": error["loc"][4],
-                            "msg": error["msg"],
-                        }
+                        error_row = {"type": error["type"], "msg": error["msg"]}
+                        if len(error["loc"]) > 4:
+                            error_row["attribute"] = error["loc"][5]
                         error_rows.append(error_row)
                 if error_rows:
                     row = ChecksTableRow(
                         status="❌",
                         pipeline=pipeline_name,
                         config_file=config_filepath.name,
                         config_error_type="\n".join([er["type"] for er in error_rows]),
-                        attribute="\n".join([er["attribute"] for er in error_rows]),
+                        attribute="\n".join([er.get("attribute", "") for er in error_rows]),
                         config_error_message="\n".join([er["msg"] for er in error_rows]),
                     )
                     table.add_row(*row.model_dump().values(), style="red")