Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make the arg dbt_project_path in the ProjectConfig optional #581

Merged
merged 8 commits into from
Oct 13, 2023
57 changes: 40 additions & 17 deletions cosmos/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,39 +46,67 @@ class ProjectConfig:
"""
Class for setting project config.

:param dbt_project_path: The path to the dbt project directory. Example: /path/to/dbt/project
:param dbt_project_path: The path to the dbt project directory. Example: /path/to/dbt/project. Defaults to None
:param models_relative_path: The relative path to the dbt models directory within the project. Defaults to models
:param seeds_relative_path: The relative path to the dbt seeds directory within the project. Defaults to seeds
:param snapshots_relative_path: The relative path to the dbt snapshots directory within the project. Defaults to
snapshots
:param manifest_path: The absolute path to the dbt manifest file. Defaults to None
:param project_name: Allows the user to define the project name.
Required if dbt_project_path is not defined. Defaults to the folder name of dbt_project_path.
"""

dbt_project_path: str | Path
dbt_project_path: str | Path | None = None
models_relative_path: str | Path = "models"
seeds_relative_path: str | Path = "seeds"
snapshots_relative_path: str | Path = "snapshots"
manifest_path: str | Path | None = None

parsed_dbt_project_path: Path | None = None
tatiana marked this conversation as resolved.
Show resolved Hide resolved
parsed_manifest_path: Path | None = None

project_name: str | None = None

def __post_init__(self) -> None:
"Converts paths to `Path` objects."
self.dbt_project_path = Path(self.dbt_project_path)
self.models_relative_path = self.dbt_project_path / Path(self.models_relative_path)
self.seeds_relative_path = self.dbt_project_path / Path(self.seeds_relative_path)
self.snapshots_relative_path = self.dbt_project_path / Path(self.snapshots_relative_path)
if self.dbt_project_path:
self.parsed_dbt_project_path = Path(self.dbt_project_path)
self.models_relative_path = self.parsed_dbt_project_path / Path(self.models_relative_path)
self.seeds_relative_path = self.parsed_dbt_project_path / Path(self.seeds_relative_path)
self.snapshots_relative_path = self.parsed_dbt_project_path / Path(self.snapshots_relative_path)
if not self.project_name:
self.project_name = self.parsed_dbt_project_path.stem

if self.manifest_path:
self.parsed_manifest_path = Path(self.manifest_path)

def validate_project(self) -> None:
"Validates that the project, models, and seeds directories exist."
project_yml_path = Path(self.dbt_project_path) / "dbt_project.yml"
mandatory_paths = {
"dbt_project.yml": project_yml_path,
"models directory ": self.models_relative_path,
}
"""
Validates necessary context is present for a project.
There are 2 cases we need to account for
1 - the entire dbt project
2 - the dbt manifest
Here, we can assume if the project path is provided, we have scenario 1.
If the project path is not provided, we have a scenario 2
"""

mandatory_paths = {}

if self.parsed_dbt_project_path:
project_yml_path = self.parsed_dbt_project_path / "dbt_project.yml"
mandatory_paths = {
"dbt_project.yml": project_yml_path,
"models directory ": self.models_relative_path,
}
elif self.parsed_manifest_path:
if not self.project_name:
raise CosmosValueError(
"project_name required when manifest_path is present and dbt_project_path is not."
)
mandatory_paths = {"manifest file": self.parsed_manifest_path}
else:
raise CosmosValueError("dbt_project_path or manifest_path are required parameters.")

for name, path in mandatory_paths.items():
if path is None or not Path(path).exists():
raise CosmosValueError(f"Could not find {name} at {path}")
Expand All @@ -92,11 +120,6 @@ def is_manifest_available(self) -> bool:

return self.parsed_manifest_path.exists()

@property
def project_name(self) -> str:
"The name of the dbt project."
return Path(self.dbt_project_path).stem


@dataclass
class ProfileConfig:
Expand Down
46 changes: 43 additions & 3 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,25 +13,65 @@
# Tests that a ProjectConfig object can be created with valid parameters
def test_valid_parameters():
project_config = ProjectConfig(dbt_project_path="path/to/dbt/project")
assert project_config.dbt_project_path == Path("path/to/dbt/project")
assert project_config.parsed_dbt_project_path == Path("path/to/dbt/project")
assert project_config.models_relative_path == Path("path/to/dbt/project/models")
assert project_config.seeds_relative_path == Path("path/to/dbt/project/seeds")
assert project_config.snapshots_relative_path == Path("path/to/dbt/project/snapshots")
assert project_config.manifest_path is None


def test_init_with_manifest():
# Since dbt_project_path is now an optional parameter, we should test each combination for init and validation


# Passing a manifest AND project together should succeed, as previous
tatiana marked this conversation as resolved.
Show resolved Hide resolved
def test_init_with_manifest_and_project():
project_config = ProjectConfig(dbt_project_path="/tmp/some-path", manifest_path="target/manifest.json")
assert project_config.parsed_manifest_path == Path("target/manifest.json")


def test_validate_project_succeeds():
# Since dbt_project_path is optional, we should be able to operate with only a manifest
def test_init_with_manifest_and_not_project():
project_config = ProjectConfig(manifest_path="target/manifest.json")
assert project_config.parsed_manifest_path == Path("target/manifest.json")


# supplying both project and manifest paths as previous should be permitted
def test_validate_project_success_project_and_manifest():
project_config = ProjectConfig(
dbt_project_path=DBT_PROJECTS_ROOT_DIR, manifest_path=DBT_PROJECTS_ROOT_DIR / "manifest.json"
)
assert project_config.validate_project() is None


# with updated logic, passing a project alone should be permitted
def test_validate_project_success_project_and_not_manifest():
project_config = ProjectConfig(dbt_project_path=DBT_PROJECTS_ROOT_DIR)
assert project_config.validate_project() is None


# with updated logic, passing a manifest alone should fail since we also require a project_name
def test_validate_project_failure_not_project_and_manifest():
project_config = ProjectConfig(manifest_path=DBT_PROJECTS_ROOT_DIR / "manifest.json")
with pytest.raises(CosmosValueError) as err_info:
assert project_config.validate_project() is None
print(err_info.value.args[0])
assert err_info.value.args[0] == "project_name required when manifest_path is present and dbt_project_path is not."


# with updated logic, passing a manifest and project name together should succeed.
def test_validate_project_success_not_project_and_manifest():
project_config = ProjectConfig(manifest_path=DBT_PROJECTS_ROOT_DIR / "manifest.json", project_name="test-project")
assert project_config.validate_project() is None


# with updated logic, passing no manifest and no project directory should fail.
def test_validate_project_fail_none():
project_config = ProjectConfig()
with pytest.raises(CosmosValueError) as err_info:
assert project_config.validate_project() is None
assert err_info.value.args[0] == "dbt_project_path or manifest_path are required parameters."


def test_validate_project_fails():
project_config = ProjectConfig(dbt_project_path=Path("/tmp"))
with pytest.raises(CosmosValueError) as err_info:
Expand Down
Loading