From f6538082eef63029ca5ac5f9dce241dd9a67086f Mon Sep 17 00:00:00 2001 From: tabmra Date: Mon, 9 Oct 2023 15:56:40 +0800 Subject: [PATCH 1/5] make dbt_project_path optional, allow project_name to be user-defined --- cosmos/config.py | 57 ++++++++++++++++++++++++++++++-------------- tests/test_config.py | 39 +++++++++++++++++++++++++++--- 2 files changed, 75 insertions(+), 21 deletions(-) diff --git a/cosmos/config.py b/cosmos/config.py index c61a5a712..c2b2c736f 100644 --- a/cosmos/config.py +++ b/cosmos/config.py @@ -46,39 +46,66 @@ class ProjectConfig: """ Class for setting project config. - :param dbt_project_path: The path to the dbt project directory. Example: /path/to/dbt/project + :param dbt_project_path: The path to the dbt project directory. Example: /path/to/dbt/project. Defaults to None :param models_relative_path: The relative path to the dbt models directory within the project. Defaults to models :param seeds_relative_path: The relative path to the dbt seeds directory within the project. Defaults to seeds :param snapshots_relative_path: The relative path to the dbt snapshots directory within the project. Defaults to snapshots :param manifest_path: The absolute path to the dbt manifest file. Defaults to None + :param project_name: Allows the user to define the project name. + Required if dbt_project_path is not defined. Defaults to the folder name of dbt_project_path. """ - dbt_project_path: str | Path + dbt_project_path: str | Path | None = None models_relative_path: str | Path = "models" seeds_relative_path: str | Path = "seeds" snapshots_relative_path: str | Path = "snapshots" manifest_path: str | Path | None = None + parsed_dbt_project_path: Path | None = None parsed_manifest_path: Path | None = None + project_name: str | None = None + def __post_init__(self) -> None: "Converts paths to `Path` objects." - self.dbt_project_path = Path(self.dbt_project_path) - self.models_relative_path = self.dbt_project_path / Path(self.models_relative_path) - self.seeds_relative_path = self.dbt_project_path / Path(self.seeds_relative_path) - self.snapshots_relative_path = self.dbt_project_path / Path(self.snapshots_relative_path) + if self.dbt_project_path: + self.parsed_dbt_project_path = Path(self.dbt_project_path) + self.models_relative_path = self.parsed_dbt_project_path / Path(self.models_relative_path) + self.seeds_relative_path = self.parsed_dbt_project_path / Path(self.seeds_relative_path) + self.snapshots_relative_path = self.parsed_dbt_project_path / Path(self.snapshots_relative_path) + if not self.project_name: + self.project_name = self.parsed_dbt_project_path.stem if self.manifest_path: self.parsed_manifest_path = Path(self.manifest_path) def validate_project(self) -> None: - "Validates that the project, models, and seeds directories exist." - project_yml_path = Path(self.dbt_project_path) / "dbt_project.yml" - mandatory_paths = { - "dbt_project.yml": project_yml_path, - "models directory ": self.models_relative_path, - } + """ + Validates necessary context is present for a project. + There are 2 cases we need to account for + 1 - the entire dbt project + 2 - the dbt manifest + Here, we can assume if the project path is provided, we have scenario 1. If the project path is not provided, we have a scenario 2 + """ + + mandatory_paths = {} + + if self.parsed_dbt_project_path: + project_yml_path = self.parsed_dbt_project_path / "dbt_project.yml" + mandatory_paths = { + "dbt_project.yml": project_yml_path, + "models directory ": self.models_relative_path, + } + elif self.parsed_manifest_path: + if not self.project_name: + raise CosmosValueError(f"A required project field was not present - project_name must be provided when manifest_path is provided and dbt_project_path is not.") + mandatory_paths = { + "manifest file": self.parsed_manifest_path + } + else: + raise CosmosValueError("A required project field was not present - the user must provide either dbt_project_path or manifest_path") + for name, path in mandatory_paths.items(): if path is None or not Path(path).exists(): raise CosmosValueError(f"Could not find {name} at {path}") @@ -92,12 +119,6 @@ def is_manifest_available(self) -> bool: return self.parsed_manifest_path.exists() - @property - def project_name(self) -> str: - "The name of the dbt project." - return Path(self.dbt_project_path).stem - - @dataclass class ProfileConfig: """ diff --git a/tests/test_config.py b/tests/test_config.py index ee96b03c3..d6377e4dc 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -13,24 +13,57 @@ # Tests that a ProjectConfig object can be created with valid parameters def test_valid_parameters(): project_config = ProjectConfig(dbt_project_path="path/to/dbt/project") - assert project_config.dbt_project_path == Path("path/to/dbt/project") + assert project_config.parsed_dbt_project_path == Path("path/to/dbt/project") assert project_config.models_relative_path == Path("path/to/dbt/project/models") assert project_config.seeds_relative_path == Path("path/to/dbt/project/seeds") assert project_config.snapshots_relative_path == Path("path/to/dbt/project/snapshots") assert project_config.manifest_path is None -def test_init_with_manifest(): +# Since dbt_project_path is now an optional parameter, we should test each combination for init and validation + +# Passing a manifest AND project together should succeed, as previous +def test_init_with_manifest_and_project(): project_config = ProjectConfig(dbt_project_path="/tmp/some-path", manifest_path="target/manifest.json") assert project_config.parsed_manifest_path == Path("target/manifest.json") +# Since dbt_project_path is optional, we should be able to operate with only a manifest +def test_init_with_manifest_and_not_project(): + project_config = ProjectConfig(manifest_path="target/manifest.json") + assert project_config.parsed_manifest_path == Path("target/manifest.json") -def test_validate_project_succeeds(): +# supplying both project and manifest paths as previous should be permitted +def test_validate_project_success_project_and_manifest(): project_config = ProjectConfig( dbt_project_path=DBT_PROJECTS_ROOT_DIR, manifest_path=DBT_PROJECTS_ROOT_DIR / "manifest.json" ) assert project_config.validate_project() is None +# with updated logic, passing a project alone should be permitted +def test_validate_project_success_project_and_not_manifest(): + project_config = ProjectConfig( + dbt_project_path=DBT_PROJECTS_ROOT_DIR + ) + assert project_config.validate_project() is None + +# with updated logic, passing a manifest alone should fail since we also require a project_name +def test_validate_project_failure_not_project_and_manifest(): + project_config = ProjectConfig( + manifest_path=DBT_PROJECTS_ROOT_DIR / "manifest.json" + ) + with pytest.raises(CosmosValueError) as err_info: + assert project_config.validate_project() is None + print(err_info.value.args[0]) + assert err_info.value.args[0] == "A required project field was not present - project_name must be provided when manifest_path is provided and dbt_project_path is not." + +# with updated logic, passing a manifest and project name together should succeed. +def test_validate_project_success_not_project_and_manifest(): + project_config = ProjectConfig( + manifest_path=DBT_PROJECTS_ROOT_DIR / "manifest.json", + project_name="test-project" + ) + assert project_config.validate_project() is None + def test_validate_project_fails(): project_config = ProjectConfig(dbt_project_path=Path("/tmp")) From 1a51b5e5142082993a9361e61e71f1d22d367aa1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 9 Oct 2023 08:57:24 +0000 Subject: [PATCH 2/5] =?UTF-8?q?=F0=9F=8E=A8=20[pre-commit.ci]=20Auto=20for?= =?UTF-8?q?mat=20from=20pre-commit.com=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cosmos/config.py | 13 ++++++++----- tests/test_config.py | 24 +++++++++++++----------- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/cosmos/config.py b/cosmos/config.py index c2b2c736f..b8e27ae78 100644 --- a/cosmos/config.py +++ b/cosmos/config.py @@ -99,12 +99,14 @@ def validate_project(self) -> None: } elif self.parsed_manifest_path: if not self.project_name: - raise CosmosValueError(f"A required project field was not present - project_name must be provided when manifest_path is provided and dbt_project_path is not.") - mandatory_paths = { - "manifest file": self.parsed_manifest_path - } + raise CosmosValueError( + "A required project field was not present - project_name must be provided when manifest_path is provided and dbt_project_path is not." + ) + mandatory_paths = {"manifest file": self.parsed_manifest_path} else: - raise CosmosValueError("A required project field was not present - the user must provide either dbt_project_path or manifest_path") + raise CosmosValueError( + "A required project field was not present - the user must provide either dbt_project_path or manifest_path" + ) for name, path in mandatory_paths.items(): if path is None or not Path(path).exists(): @@ -119,6 +121,7 @@ def is_manifest_available(self) -> bool: return self.parsed_manifest_path.exists() + @dataclass class ProfileConfig: """ diff --git a/tests/test_config.py b/tests/test_config.py index d6377e4dc..6fb6f84ef 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -22,16 +22,19 @@ def test_valid_parameters(): # Since dbt_project_path is now an optional parameter, we should test each combination for init and validation + # Passing a manifest AND project together should succeed, as previous def test_init_with_manifest_and_project(): project_config = ProjectConfig(dbt_project_path="/tmp/some-path", manifest_path="target/manifest.json") assert project_config.parsed_manifest_path == Path("target/manifest.json") + # Since dbt_project_path is optional, we should be able to operate with only a manifest def test_init_with_manifest_and_not_project(): project_config = ProjectConfig(manifest_path="target/manifest.json") assert project_config.parsed_manifest_path == Path("target/manifest.json") + # supplying both project and manifest paths as previous should be permitted def test_validate_project_success_project_and_manifest(): project_config = ProjectConfig( @@ -39,29 +42,28 @@ def test_validate_project_success_project_and_manifest(): ) assert project_config.validate_project() is None + # with updated logic, passing a project alone should be permitted def test_validate_project_success_project_and_not_manifest(): - project_config = ProjectConfig( - dbt_project_path=DBT_PROJECTS_ROOT_DIR - ) + project_config = ProjectConfig(dbt_project_path=DBT_PROJECTS_ROOT_DIR) assert project_config.validate_project() is None + # with updated logic, passing a manifest alone should fail since we also require a project_name def test_validate_project_failure_not_project_and_manifest(): - project_config = ProjectConfig( - manifest_path=DBT_PROJECTS_ROOT_DIR / "manifest.json" - ) + project_config = ProjectConfig(manifest_path=DBT_PROJECTS_ROOT_DIR / "manifest.json") with pytest.raises(CosmosValueError) as err_info: assert project_config.validate_project() is None print(err_info.value.args[0]) - assert err_info.value.args[0] == "A required project field was not present - project_name must be provided when manifest_path is provided and dbt_project_path is not." + assert ( + err_info.value.args[0] + == "A required project field was not present - project_name must be provided when manifest_path is provided and dbt_project_path is not." + ) + # with updated logic, passing a manifest and project name together should succeed. def test_validate_project_success_not_project_and_manifest(): - project_config = ProjectConfig( - manifest_path=DBT_PROJECTS_ROOT_DIR / "manifest.json", - project_name="test-project" - ) + project_config = ProjectConfig(manifest_path=DBT_PROJECTS_ROOT_DIR / "manifest.json", project_name="test-project") assert project_config.validate_project() is None From 9b53c5367eeb5af2d28d381dece6a3549c6a657b Mon Sep 17 00:00:00 2001 From: tabmra Date: Tue, 10 Oct 2023 15:48:44 +0800 Subject: [PATCH 3/5] added additional test for coverage. Simplified Error Messages --- cosmos/config.py | 9 ++++----- tests/test_config.py | 13 +++++++++---- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/cosmos/config.py b/cosmos/config.py index b8e27ae78..c5fe9f7d7 100644 --- a/cosmos/config.py +++ b/cosmos/config.py @@ -86,7 +86,8 @@ def validate_project(self) -> None: There are 2 cases we need to account for 1 - the entire dbt project 2 - the dbt manifest - Here, we can assume if the project path is provided, we have scenario 1. If the project path is not provided, we have a scenario 2 + Here, we can assume if the project path is provided, we have scenario 1. + If the project path is not provided, we have a scenario 2 """ mandatory_paths = {} @@ -100,13 +101,11 @@ def validate_project(self) -> None: elif self.parsed_manifest_path: if not self.project_name: raise CosmosValueError( - "A required project field was not present - project_name must be provided when manifest_path is provided and dbt_project_path is not." + "project_name required when manifest_path is present and dbt_project_path is not." ) mandatory_paths = {"manifest file": self.parsed_manifest_path} else: - raise CosmosValueError( - "A required project field was not present - the user must provide either dbt_project_path or manifest_path" - ) + raise CosmosValueError("dbt_project_path or manifest_path are required parameters.") for name, path in mandatory_paths.items(): if path is None or not Path(path).exists(): diff --git a/tests/test_config.py b/tests/test_config.py index 6fb6f84ef..ad1c8c531 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -55,10 +55,7 @@ def test_validate_project_failure_not_project_and_manifest(): with pytest.raises(CosmosValueError) as err_info: assert project_config.validate_project() is None print(err_info.value.args[0]) - assert ( - err_info.value.args[0] - == "A required project field was not present - project_name must be provided when manifest_path is provided and dbt_project_path is not." - ) + assert err_info.value.args[0] == "project_name required when manifest_path is present and dbt_project_path is not." # with updated logic, passing a manifest and project name together should succeed. @@ -67,6 +64,14 @@ def test_validate_project_success_not_project_and_manifest(): assert project_config.validate_project() is None +# with updated logic, passing no manifest and no project directory should fail. +def test_validate_project_fail_none(): + project_config = ProjectConfig() + with pytest.raises(CosmosValueError) as err_info: + assert project_config.validate_project() is None + assert err_info.value.args[0] == "dbt_project_path or manifest_path are required parameters." + + def test_validate_project_fails(): project_config = ProjectConfig(dbt_project_path=Path("/tmp")) with pytest.raises(CosmosValueError) as err_info: From 56aff394148c3c6d0ccba04a0b77cbd43cbe924d Mon Sep 17 00:00:00 2001 From: tabmra Date: Wed, 11 Oct 2023 10:31:44 +0800 Subject: [PATCH 4/5] Updated test names, docstrings. Updated cosmos.config parsed fields to properties. Fixed spelling --- cosmos/config.py | 18 +++++++++------- tests/test_config.py | 51 +++++++++++++++++++++++++++----------------- 2 files changed, 42 insertions(+), 27 deletions(-) diff --git a/cosmos/config.py b/cosmos/config.py index c5fe9f7d7..cc2255bc8 100644 --- a/cosmos/config.py +++ b/cosmos/config.py @@ -5,9 +5,11 @@ import contextlib import tempfile from dataclasses import dataclass, field +from functools import cached_property from pathlib import Path from typing import Iterator + from cosmos.constants import TestBehavior, ExecutionMode, LoadMode from cosmos.dbt.executable import get_system_dbt from cosmos.exceptions import CosmosValueError @@ -61,25 +63,25 @@ class ProjectConfig: seeds_relative_path: str | Path = "seeds" snapshots_relative_path: str | Path = "snapshots" manifest_path: str | Path | None = None + project_name: str | None = None - parsed_dbt_project_path: Path | None = None - parsed_manifest_path: Path | None = None + @cached_property + def parsed_dbt_project_path(self) -> Path | None: + return Path(self.dbt_project_path) if self.dbt_project_path else None - project_name: str | None = None + @cached_property + def parsed_manifest_path(self) -> Path | None: + return Path(self.manifest_path) if self.manifest_path else None def __post_init__(self) -> None: "Converts paths to `Path` objects." - if self.dbt_project_path: - self.parsed_dbt_project_path = Path(self.dbt_project_path) + if self.parsed_dbt_project_path: self.models_relative_path = self.parsed_dbt_project_path / Path(self.models_relative_path) self.seeds_relative_path = self.parsed_dbt_project_path / Path(self.seeds_relative_path) self.snapshots_relative_path = self.parsed_dbt_project_path / Path(self.snapshots_relative_path) if not self.project_name: self.project_name = self.parsed_dbt_project_path.stem - if self.manifest_path: - self.parsed_manifest_path = Path(self.manifest_path) - def validate_project(self) -> None: """ Validates necessary context is present for a project. diff --git a/tests/test_config.py b/tests/test_config.py index ad1c8c531..9dd936fba 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -10,7 +10,6 @@ PIPELINE_FOLDER = "jaffle_shop" -# Tests that a ProjectConfig object can be created with valid parameters def test_valid_parameters(): project_config = ProjectConfig(dbt_project_path="path/to/dbt/project") assert project_config.parsed_dbt_project_path == Path("path/to/dbt/project") @@ -20,37 +19,44 @@ def test_valid_parameters(): assert project_config.manifest_path is None -# Since dbt_project_path is now an optional parameter, we should test each combination for init and validation - - -# Passing a manifest AND project together should succeed, as previous -def test_init_with_manifest_and_project(): +def test_init_with_manifest_path_and_project_path_succeeds(): + """ + Passing a manifest path AND project path together should succeed, as previous + """ project_config = ProjectConfig(dbt_project_path="/tmp/some-path", manifest_path="target/manifest.json") assert project_config.parsed_manifest_path == Path("target/manifest.json") -# Since dbt_project_path is optional, we should be able to operate with only a manifest -def test_init_with_manifest_and_not_project(): +def test_init_with_manifest_path_and_not_project_path_succeeds(): + """ + Since dbt_project_path is optional, we should be able to operate with only a manifest + """ project_config = ProjectConfig(manifest_path="target/manifest.json") assert project_config.parsed_manifest_path == Path("target/manifest.json") -# supplying both project and manifest paths as previous should be permitted -def test_validate_project_success_project_and_manifest(): +def test_validate_with_project_path_and_manifest_path_succeeds(): + """ + Supplying both project and manifest paths as previous should be permitted + """ project_config = ProjectConfig( dbt_project_path=DBT_PROJECTS_ROOT_DIR, manifest_path=DBT_PROJECTS_ROOT_DIR / "manifest.json" ) assert project_config.validate_project() is None -# with updated logic, passing a project alone should be permitted -def test_validate_project_success_project_and_not_manifest(): +def test_validate_with_project_path_and_not_manifest_path_succeeds(): + """ + Passing a project with no manifest should be permitted + """ project_config = ProjectConfig(dbt_project_path=DBT_PROJECTS_ROOT_DIR) assert project_config.validate_project() is None -# with updated logic, passing a manifest alone should fail since we also require a project_name -def test_validate_project_failure_not_project_and_manifest(): +def test_validate_with_manifest_path_and_not_project_path_and_not_project_name_fails(): + """ + Passing a manifest alone should fail since we also require a project_name + """ project_config = ProjectConfig(manifest_path=DBT_PROJECTS_ROOT_DIR / "manifest.json") with pytest.raises(CosmosValueError) as err_info: assert project_config.validate_project() is None @@ -58,21 +64,28 @@ def test_validate_project_failure_not_project_and_manifest(): assert err_info.value.args[0] == "project_name required when manifest_path is present and dbt_project_path is not." -# with updated logic, passing a manifest and project name together should succeed. -def test_validate_project_success_not_project_and_manifest(): +def test_validate_with_manifest_path_and_project_name_and_not_project_path_succeeds(): + """ + Passing a manifest and project name together should succeed. + """ project_config = ProjectConfig(manifest_path=DBT_PROJECTS_ROOT_DIR / "manifest.json", project_name="test-project") assert project_config.validate_project() is None -# with updated logic, passing no manifest and no project directory should fail. -def test_validate_project_fail_none(): +def test_validate_no_paths_fails(): + """ + Passing no manifest and no project directory should fail. + """ project_config = ProjectConfig() with pytest.raises(CosmosValueError) as err_info: assert project_config.validate_project() is None assert err_info.value.args[0] == "dbt_project_path or manifest_path are required parameters." -def test_validate_project_fails(): +def test_validate_project_missing_fails(): + """ + Passing a project dir that does not exist where specified should fail + """ project_config = ProjectConfig(dbt_project_path=Path("/tmp")) with pytest.raises(CosmosValueError) as err_info: assert project_config.validate_project() is None From fc42acd76a2102908e2581223b811b6e1deac92b Mon Sep 17 00:00:00 2001 From: tabmra Date: Wed, 11 Oct 2023 13:10:43 +0800 Subject: [PATCH 5/5] Updated docs for project-config to reflect optional parameter --- docs/configuration/project-config.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/configuration/project-config.rst b/docs/configuration/project-config.rst index 9f00930ba..c1d952f6e 100644 --- a/docs/configuration/project-config.rst +++ b/docs/configuration/project-config.rst @@ -4,7 +4,7 @@ Project Config The ``cosmos.config.ProjectConfig`` allows you to specify information about where your dbt project is located. It takes the following arguments: -- ``dbt_project_path`` (required): The full path to your dbt project. This directory should have a ``dbt_project.yml`` file +- ``dbt_project_path``: The full path to your dbt project. This directory should have a ``dbt_project.yml`` file - ``models_relative_path``: The path to your models directory, relative to the ``dbt_project_path``. This defaults to ``models/`` - ``seeds_relative_path``: The path to your seeds directory, relative to the ``dbt_project_path``. This defaults to @@ -13,6 +13,9 @@ takes the following arguments: to ``snapshots/`` - ``manifest_path``: The absolute path to your manifests directory. This is only required if you're using Cosmos' manifest parsing mode +- ``project_name`` : The name of the project. If ``dbt_project_path`` is provided, the ``project_name`` defaults to the + folder name containing ``dbt_project.yml``. If ``dbt_project_path`` is not provided, and ``manifest_path`` is provided, + ``project_name`` is required as the name can not be inferred from ``dbt_project_path`` Project Config Example