Skip to content

Commit

Permalink
Add management schema feature
Browse files Browse the repository at this point in the history
add noop and warn tests

improve tests

rename tests

add view dropping test

add unmanaged schema test

make tests more dry

Delete tmp.csv

Manage schemas is optional

Add --target-path as a CLI option. (#5402)

Include py.typed in MANIFEST.in (#5703)

This enables packages that install dbt-core from pypi to use mypy.

wip: move manage logic to separate command

Add manage command
  • Loading branch information
bneijt committed Jun 23, 2023
1 parent 5339882 commit 05fcaa2
Show file tree
Hide file tree
Showing 9 changed files with 415 additions and 5 deletions.
7 changes: 7 additions & 0 deletions .changes/unreleased/Features-20220920-122529.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Features
body: Added a `manage` CLI command that allows users to drop unused database relations
time: 2022-09-20T12:25:29.226182+02:00
custom:
Author: agoblet bneijt
Issue: "4957"
PR: "5392"
5 changes: 5 additions & 0 deletions core/dbt/config/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from dbt.contracts.project import (
Project as ProjectContract,
SemverString,
SchemaManagementConfiguration,
)
from dbt.contracts.project import PackageConfig, ProjectPackageMetadata
from dbt.contracts.publication import ProjectDependencies
Expand Down Expand Up @@ -429,6 +430,7 @@ def create_project(self, rendered: RenderComponents) -> "Project":
model_paths, seed_paths, snapshot_paths, analysis_paths, macro_paths
)

managed_schemas: List[SchemaManagementConfiguration] = value_or(cfg.managed_schemas, [])
docs_paths: List[str] = value_or(cfg.docs_paths, all_source_paths)
asset_paths: List[str] = value_or(cfg.asset_paths, [])
flags = get_flags()
Expand Down Expand Up @@ -503,6 +505,7 @@ def create_project(self, rendered: RenderComponents) -> "Project":
asset_paths=asset_paths,
target_path=target_path,
snapshot_paths=snapshot_paths,
managed_schemas=managed_schemas,
clean_targets=clean_targets,
log_path=log_path,
packages_install_path=packages_install_path,
Expand Down Expand Up @@ -618,6 +621,7 @@ class Project:
asset_paths: List[str]
target_path: str
snapshot_paths: List[str]
managed_schemas: List[SchemaManagementConfiguration]
clean_targets: List[str]
log_path: str
packages_install_path: str
Expand Down Expand Up @@ -695,6 +699,7 @@ def to_project_config(self, with_packages=False):
"asset-paths": self.asset_paths,
"target-path": self.target_path,
"snapshot-paths": self.snapshot_paths,
"managed-schemas": [schema.to_dict() for schema in self.managed_schemas],
"clean-targets": self.clean_targets,
"log-path": self.log_path,
"quoting": self.quoting,
Expand Down
1 change: 1 addition & 0 deletions core/dbt/config/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ def from_parts(
asset_paths=project.asset_paths,
target_path=project.target_path,
snapshot_paths=project.snapshot_paths,
managed_schemas=project.managed_schemas,
clean_targets=project.clean_targets,
log_path=project.log_path,
packages_install_path=project.packages_install_path,
Expand Down
15 changes: 15 additions & 0 deletions core/dbt/contracts/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
HyphenatedDbtClassMixin,
ExtensibleDbtClassMixin,
register_pattern,
StrEnum,
)
from dataclasses import dataclass, field
from typing import Optional, List, Dict, Union, Any
Expand Down Expand Up @@ -181,6 +182,19 @@ class RegistryPackageMetadata(
}


class PruneModelsAction(StrEnum):
SKIP = "skip"
DROP = "drop"
WARN = "warn"


@dataclass
class SchemaManagementConfiguration(HyphenatedDbtClassMixin, Replaceable):
database: Optional[str] = None
schema: Optional[str] = None
prune_models: Optional[PruneModelsAction] = None


@dataclass
class Project(HyphenatedDbtClassMixin, Replaceable):
name: Identifier
Expand All @@ -198,6 +212,7 @@ class Project(HyphenatedDbtClassMixin, Replaceable):
asset_paths: Optional[List[str]] = None
target_path: Optional[str] = None
snapshot_paths: Optional[List[str]] = None
managed_schemas: Optional[List[SchemaManagementConfiguration]] = None
clean_targets: Optional[List[str]] = None
profile: Optional[str] = None
log_path: Optional[str] = None
Expand Down
80 changes: 80 additions & 0 deletions core/dbt/task/manage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# coding=utf-8
from typing import Dict, Set, Tuple

from .compile import CompileTask
from .runnable import ManifestTask
from dbt.exceptions import warn_or_error, ValidationException
from dbt.adapters.factory import get_adapter
from dbt.contracts.graph.parsed import (
ParsedModelNode,
)
from dbt.contracts.project import PruneModelsAction


class ManageTask(CompileTask):
def run(self):
ManifestTask._runtime_initialize(self)
models_in_codebase = self.manifest.nodes.keys()
adapter = get_adapter(self.config)

with adapter.connection_named("master"):
required_schemas = self.get_model_schemas(adapter, models_in_codebase)
self.populate_adapter_cache(adapter, required_schemas)

adapter.clear_transaction()
self._prune_models(adapter)

def _prune_models(self, adapter):
self._assert_schema_uniqueness()

if len(self.config.managed_schemas) == 0:
warn_or_error("No schema's configured to manage")
return

models_in_codebase: Set[Tuple[str, str, str]] = set(
(n.config.database, n.config.schema, n.config.alias)
for n in self.manifest.nodes.values()
if isinstance(n, ParsedModelNode)
)

# get default 'database' + 'schema' for active target
creds = adapter.connections.profile.credentials
default_database, default_schema = creds.database, creds.schema

for config in self.config.managed_schemas:
database = config.database or default_database
schema = config.schema or default_schema

models_in_database: Dict[Tuple[str, str, str], str] = {
(database, schema, relation.identifier): relation
for relation in adapter.list_relations(database, schema)
}
if len(models_in_database) == 0:
warn_or_error(
f"No objects in managed schema '{database}.{schema}'"
)

should_act_upon = models_in_database.keys() - models_in_codebase

for (target_database, target_schema, target_identifier) in sorted(should_act_upon):
target_action = config.prune_models or PruneModelsAction.SKIP
if target_action == PruneModelsAction.WARN:
warn_or_error(
f"Found unused model {target_database}.{target_schema}.{target_identifier}"
)
elif target_action == PruneModelsAction.DROP:
adapter.drop_relation(
models_in_database[(target_database, target_schema, target_identifier)]
)

def _assert_schema_uniqueness(self):
schemas = set()

for config in self.config.managed_schemas:
schema = (config.database, config.schema)
if schema in schemas:
raise ValidationException(f"Duplicate schema found: {schema}")
schemas.add(schema)

def interpret_results(self, results):
return True
10 changes: 5 additions & 5 deletions core/dbt/task/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@
from dbt.contracts.graph.nodes import HookNode, ResultNode
from dbt.contracts.results import NodeStatus, RunResult, RunStatus, RunningStatus, BaseResult
from dbt.exceptions import (
CompilationError,
DbtInternalError,
MissingMaterializationError,
DbtRuntimeError,
DbtValidationError,
CompilationException,
InternalException,
RuntimeException,
ValidationException,
missing_materialization,
)
from dbt.events.functions import fire_event, get_invocation_id
from dbt.events.types import (
Expand Down
5 changes: 5 additions & 0 deletions core/dbt/tests/fixtures/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,11 @@ def get_tables_in_schema(self):
result = self.run_sql(sql, fetch="all")
return {model_name: materialization for (model_name, materialization) in result}

def update_models(self, models: dict):
"""Update the modules in the test project"""
self.project_root.join("models").remove()
write_project_files(self.project_root, "models", models)


# This is the main fixture that is used in all functional tests. It pulls in the other
# fixtures that are necessary to set up a dbt project, and saves some of the information
Expand Down
1 change: 1 addition & 0 deletions tests/functional/schema_management/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Test schema management as introduced by https://github.com/dbt-labs/dbt-core/issues/4957
Loading

0 comments on commit 05fcaa2

Please sign in to comment.