From 847320e65978d5770cf2e338a35ff1242b68e26b Mon Sep 17 00:00:00 2001 From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com> Date: Tue, 26 Sep 2023 13:12:27 -0700 Subject: [PATCH] persist view column comments (#893) * persist view column comments * format: whitespace * extracted get_matched_column macro * move parenthesis to the calling macro * changelog * fix: remove matching column in different case * fix: remove get_matched_column macro - not much logic left there. * escape column comments and add functional test * Update Features-20230817-130731.yaml * remove unneeded f string * add test fixture to view test * fix fixtures for TestPersistDocsDeltaView * fix fixtures for TestPersistDocsDeltaView * formatting * fix tests --------- Co-authored-by: Juri Krainjukov (cherry picked from commit 1672efd4fff26f884decec22b8f02924e7a91976) --- .../unreleased/Features-20230817-130731.yaml | 6 +++ dbt/include/spark/macros/adapters.sql | 21 +++++++++ .../adapter/persist_docs/fixtures.py | 28 ++++++++++++ .../adapter/persist_docs/test_persist_docs.py | 44 +++++++++++++++++++ 4 files changed, 99 insertions(+) create mode 100644 .changes/unreleased/Features-20230817-130731.yaml diff --git a/.changes/unreleased/Features-20230817-130731.yaml b/.changes/unreleased/Features-20230817-130731.yaml new file mode 100644 index 000000000..e88deb7bd --- /dev/null +++ b/.changes/unreleased/Features-20230817-130731.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Persist Column level comments when creating views +time: 2023-08-17T13:07:31.6812862Z +custom: + Author: jurasan + Issue: 372 diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index 202564e4e..c9b32db2f 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -223,9 +223,30 @@ {% endfor %} {% endmacro %} +{% macro get_column_comment_sql(column_name, column_dict) -%} + {% if column_name in column_dict and column_dict[column_name]["description"] -%} + {% set escaped_description = column_dict[column_name]["description"] | replace("'", "\\'") %} + {% set column_comment_clause = "comment '" ~ escaped_description ~ "'" %} + {%- endif -%} + {{ adapter.quote(column_name) }} {{ column_comment_clause }} +{% endmacro %} + +{% macro get_persist_docs_column_list(model_columns, query_columns) %} + {% for column_name in query_columns %} + {{ get_column_comment_sql(column_name, model_columns) }} + {{- ", " if not loop.last else "" }} + {% endfor %} +{% endmacro %} {% macro spark__create_view_as(relation, sql) -%} create or replace view {{ relation }} + {% if config.persist_column_docs() -%} + {% set model_columns = model.columns %} + {% set query_columns = get_columns_in_query(sql) %} + ( + {{ get_persist_docs_column_list(model_columns, query_columns) }} + ) + {% endif %} {{ comment_clause() }} {%- set contract_config = config.get('contract') -%} {%- if contract_config.enforced -%} diff --git a/tests/functional/adapter/persist_docs/fixtures.py b/tests/functional/adapter/persist_docs/fixtures.py index 3c351ab55..b884b7dec 100644 --- a/tests/functional/adapter/persist_docs/fixtures.py +++ b/tests/functional/adapter/persist_docs/fixtures.py @@ -21,11 +21,39 @@ select 1 as id, 'Joe' as name """ +_MODELS__VIEW_DELTA_MODEL = """ +{{ config(materialized='view') }} +select id, count(*) as count from {{ ref('table_delta_model') }} group by id +""" + _MODELS__TABLE_DELTA_MODEL_MISSING_COLUMN = """ {{ config(materialized='table', file_format='delta') }} select 1 as id, 'Joe' as different_name """ +_VIEW_PROPERTIES_MODELS = """ +version: 2 +models: + - name: view_delta_model + description: | + View model description "with double quotes" + and with 'single quotes' as welll as other; + '''abc123''' + reserved -- characters + -- + /* comment */ + Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting + columns: + - name: id + description: | + id Column description "with double quotes" + and with 'single quotes' as welll as other; + '''abc123''' + reserved -- characters + -- + /* comment */ + Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting +""" _PROPERTIES__MODELS = """ version: 2 diff --git a/tests/functional/adapter/persist_docs/test_persist_docs.py b/tests/functional/adapter/persist_docs/test_persist_docs.py index 0e3d102dc..ee02e5ef8 100644 --- a/tests/functional/adapter/persist_docs/test_persist_docs.py +++ b/tests/functional/adapter/persist_docs/test_persist_docs.py @@ -10,6 +10,8 @@ _PROPERTIES__MODELS, _PROPERTIES__SEEDS, _SEEDS__BASIC, + _MODELS__VIEW_DELTA_MODEL, + _VIEW_PROPERTIES_MODELS, ) @@ -76,6 +78,48 @@ def test_delta_comments(self, project): assert result[2].startswith("Some stuff here and then a call to") +@pytest.mark.skip_profile("apache_spark", "spark_session") +class TestPersistDocsDeltaView: + @pytest.fixture(scope="class") + def models(self): + return { + "table_delta_model.sql": _MODELS__TABLE_DELTA_MODEL, + "view_delta_model.sql": _MODELS__VIEW_DELTA_MODEL, + "schema.yml": _VIEW_PROPERTIES_MODELS, + } + + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "models": { + "test": { + "+persist_docs": { + "relation": True, + "columns": True, + }, + } + }, + } + + def test_delta_comments(self, project): + run_dbt(["run"]) + + results = project.run_sql( + "describe extended {schema}.{table}".format( + schema=project.test_schema, table="view_delta_model" + ), + fetch="all", + ) + + for result in results: + if result[0] == "Comment": + assert result[1].startswith("View model description") + if result[0] == "id": + assert result[2].startswith("id Column description") + if result[0] == "count": + assert result[2] is None + + @pytest.mark.skip_profile("apache_spark", "spark_session") class TestPersistDocsMissingColumn: @pytest.fixture(scope="class")