Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add exports config to YAML spec for saved queries #190

Merged
merged 10 commits into from
Oct 25, 2023
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20231024-162842.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Add exports configuration to YAML spec.
time: 2023-10-24T16:28:42.013032-07:00
custom:
Author: courtneyholcomb
Issue: "189"
2 changes: 1 addition & 1 deletion .github/workflows/ci-schema-consistency.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
python-version: "3.9"

- name: Generate JSON Schema
run: hatch run dev-env:python dbt_semantic_interfaces/parsing/generate_json_schema_file.py
run: make json_schema

- name: Schema Consistency Check
run: |
Expand Down
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,6 @@ test:

lint:
hatch run dev-env:pre-commit run --show-diff-on-failure --color=always --all-files

json_schema:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This make target was mentioned in CI, but it didn't actually exist in the repo so I added it.

hatch run dev-env:python dbt_semantic_interfaces/parsing/generate_json_schema_file.py
40 changes: 40 additions & 0 deletions dbt_semantic_interfaces/implementations/export.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from __future__ import annotations

from typing import Optional

from pydantic import Field
from typing_extensions import override

from dbt_semantic_interfaces.implementations.base import HashableBaseModel
from dbt_semantic_interfaces.protocols import ProtocolHint
from dbt_semantic_interfaces.protocols.export import Export, ExportConfig
from dbt_semantic_interfaces.type_enums.export_destination_type import (
ExportDestinationType,
)


class PydanticExportConfig(HashableBaseModel, ProtocolHint[ExportConfig]):
"""Pydantic implementation of ExportConfig.

Note on `schema_name`: `schema` is a BaseModel attribute so we need to alias it here.
Use `schema` for YAML definition & JSON, `schema_name` for object attribute.
"""

@override
def _implements_protocol(self) -> ExportConfig:
return self

export_as: ExportDestinationType
schema_name: Optional[str] = Field(serialization_alias="schema", validation_alias="schema_name")
alias: Optional[str] = None


class PydanticExport(HashableBaseModel, ProtocolHint[Export]):
"""Pydantic implementation of Export."""

@override
def _implements_protocol(self) -> Export:
return self

name: str
config: PydanticExportConfig
2 changes: 2 additions & 0 deletions dbt_semantic_interfaces/implementations/saved_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
HashableBaseModel,
ModelWithMetadataParsing,
)
from dbt_semantic_interfaces.implementations.export import PydanticExport
from dbt_semantic_interfaces.implementations.filters.where_filter import (
PydanticWhereFilterIntersection,
)
Expand All @@ -31,3 +32,4 @@ def _implements_protocol(self) -> SavedQuery:
description: Optional[str] = None
metadata: Optional[PydanticMetadata] = None
label: Optional[str] = None
exports: Optional[List[PydanticExport]] = None
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,47 @@
],
"type": "object"
},
"export_config_schema": {
"$id": "export_config_schema",
"additionalProperties": false,
"properties": {
"alias": {
"type": "string"
},
"export_as": {
"enum": [
"TABLE",
"VIEW",
"table",
"view"
]
},
"schema": {
"type": "string"
}
},
"required": [
"export_as"
],
"type": "object"
},
"export_schema": {
"$id": "export_schema",
"additionalProperties": false,
"properties": {
"config": {
"$ref": "#/definitions/export_config_schema"
},
"name": {
"type": "string"
}
},
"required": [
"name",
"config"
],
"type": "object"
},
"filter_schema": {
"$id": "filter_schema",
"oneOf": [
Expand Down Expand Up @@ -430,6 +471,12 @@
"description": {
"type": "string"
},
"exports": {
"items": {
"$ref": "#/definitions/export_schema"
},
"type": "array"
},
"group_bys": {
"items": {
"type": "string"
Expand Down
30 changes: 30 additions & 0 deletions dbt_semantic_interfaces/parsing/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@

time_dimension_type_values = ["TIME", "time"]

export_destination_type_values = ["TABLE", "VIEW"]
export_destination_type_values += [x.lower() for x in export_destination_type_values]


filter_schema = {
"$id": "filter_schema",
"oneOf": [
Expand Down Expand Up @@ -288,6 +292,29 @@
"required": ["time_spine_table_configurations"],
}

export_config_schema = {
"$id": "export_config_schema",
"type": "object",
"properties": {
"export_as": {"enum": export_destination_type_values},
"schema": {"type": "string"},
"alias": {"type": "string"},
},
"required": ["export_as"],
"additionalProperties": False,
}


export_schema = {
"$id": "export_schema",
"type": "object",
"properties": {
"name": {"type": "string"},
"config": {"$ref": "export_config_schema"},
},
"required": ["name", "config"],
"additionalProperties": False,
}

saved_query_schema = {
"$id": "saved_query_schema",
Expand All @@ -305,6 +332,7 @@
},
"where": {"$ref": "filter_schema"},
"label": {"type": "string"},
"exports": {"type": "array", "items": {"$ref": "export_schema"}},
},
"required": ["name", "metrics"],
"additionalProperties": False,
Expand Down Expand Up @@ -355,6 +383,8 @@
node_relation_schema["$id"]: node_relation_schema,
semantic_model_defaults_schema["$id"]: semantic_model_defaults_schema,
time_spine_table_configuration_schema["$id"]: time_spine_table_configuration_schema,
export_schema["$id"]: export_schema,
export_config_schema["$id"]: export_config_schema,
}

resources: List[Tuple[str, Resource]] = [(str(k), DRAFT7.create_resource(v)) for k, v in schema_store.items()]
Expand Down
44 changes: 44 additions & 0 deletions dbt_semantic_interfaces/protocols/export.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from __future__ import annotations

from abc import abstractmethod
from typing import Optional, Protocol

from dbt_semantic_interfaces.type_enums.export_destination_type import (
ExportDestinationType,
)


class Export(Protocol):
"""Configuration for writing query results to a table."""

@property
@abstractmethod
def name(self) -> str: # noqa: D
pass

@property
@abstractmethod
def config(self) -> ExportConfig: # noqa: D
pass


class ExportConfig(Protocol):
"""Nested configuration attributes for exports."""

@property
@abstractmethod
def export_as(self) -> ExportDestinationType:
"""Type of destination to write export to."""
pass

@property
@abstractmethod
def schema_name(self) -> Optional[str]:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@QMalcolm the core parser can take schema from yaml and convert it to schema_name just as we do with Pydantic here, correct? I'm assuming that's the case.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is indeed the case. We have a separate unparsed and parsed node representation. It'll be schema in the unparsed and schema_name in the parsed

"""Schema to write export to. Defaults to deployment schema."""
pass

@property
@abstractmethod
def alias(self) -> Optional[str]:
"""Name for table/filte export is written to. Defaults to export name."""
pass
7 changes: 7 additions & 0 deletions dbt_semantic_interfaces/protocols/saved_query.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from abc import abstractmethod
from typing import Optional, Protocol, Sequence

from dbt_semantic_interfaces.protocols.export import Export
from dbt_semantic_interfaces.protocols.metadata import Metadata
from dbt_semantic_interfaces.protocols.where_filter import WhereFilterIntersection

Expand Down Expand Up @@ -44,3 +45,9 @@ def where(self) -> Optional[WhereFilterIntersection]:
def label(self) -> Optional[str]:
"""Returns a string representing a human readable label for the saved query."""
pass

@property
@abstractmethod
def exports(self) -> Optional[Sequence[Export]]:
"""Exports that can run using this saved query."""
pass
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from dbt_semantic_interfaces.enum_extension import ExtendedEnum


class ExportDestinationType(ExtendedEnum):
"""Types of destinations that exports can be written to."""

TABLE = "table"
VIEW = "view"
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,9 @@ saved_query:
- Dimension('listing__capacity_latest')
where:
- "{{ Dimension('listing__capacity_latest') }} > 3"
exports:
- name: bookings
config:
export_as: table
schema: exports_schema
alias: bookings_export_table
43 changes: 43 additions & 0 deletions tests/parsing/test_saved_query_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
parse_yaml_files_to_semantic_manifest,
)
from dbt_semantic_interfaces.parsing.objects import YamlConfigFile
from dbt_semantic_interfaces.type_enums.export_destination_type import (
ExportDestinationType,
)
from tests.example_project_configuration import (
EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE,
)
Expand Down Expand Up @@ -134,3 +137,43 @@ def test_saved_query_where() -> None:
assert saved_query.where is not None
assert len(saved_query.where.where_filters) == 1
assert where == saved_query.where.where_filters[0].where_sql_template


def test_saved_query_exports() -> None:
"""Test for parsing exports referenced in a saved query."""
yaml_contents = textwrap.dedent(
"""\
saved_query:
name: test_exports
metrics:
- test_metric_a
exports:
- name: test_exports1
config:
export_as: VIEW
schema: my_schema
alias: my_view_name
- name: test_exports2
config:
export_as: table
"""
)
file = YamlConfigFile(filepath="inline_for_test", contents=yaml_contents)

build_result = parse_yaml_files_to_semantic_manifest(files=[file, EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE])

assert len(build_result.semantic_manifest.saved_queries) == 1
saved_query = build_result.semantic_manifest.saved_queries[0]
assert saved_query.exports and len(saved_query.exports) == 2
names_to_exports = {export.name: export for export in saved_query.exports}
assert set(names_to_exports.keys()) == {"test_exports1", "test_exports2"}

export1_config = names_to_exports["test_exports1"].config
assert export1_config.export_as == ExportDestinationType.VIEW
assert export1_config.schema_name == "my_schema"
assert export1_config.alias == "my_view_name"

export2_config = names_to_exports["test_exports2"].config
assert export2_config.export_as == ExportDestinationType.TABLE
assert export2_config.schema_name is None
assert export2_config.alias is None
Comment on lines +178 to +179
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Question for the folks who designed the spec - do we want to allow this? It seems like we can't actually fulfill this contract because there's no way to tell a warehouse "here, put this data..... someplace."

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note - if this will be caught by a validation PR I haven't read yet, just tell me to go away. :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tlento we'll default to the deployment schema when we actually implement the logic for this! #189

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about the table name? Or is the table name the export name? Is that obvious to people?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep it will default to the export name. I have no idea if that's obvious to people but it should be in the docs when we get to that

2 changes: 2 additions & 0 deletions tests/test_implements_satisfy_protocols.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
PydanticMeasureAggregationParameters,
PydanticNonAdditiveDimensionParameters,
)
from dbt_semantic_interfaces.implementations.export import PydanticExport
from dbt_semantic_interfaces.implementations.filters.where_filter import (
PydanticWhereFilter,
)
Expand Down Expand Up @@ -124,6 +125,7 @@
description=OPTIONAL_STR_STRATEGY,
metadata=OPTIONAL_METADATA_STRATEGY,
label=OPTIONAL_STR_STRATEGY,
exports=from_type(List[PydanticExport]),
)


Expand Down
Loading