From 77a76a295571795f75165804681d1ded12738d30 Mon Sep 17 00:00:00 2001 From: jochen Date: Thu, 19 Dec 2024 14:41:28 +0100 Subject: [PATCH 1/5] Fixing the data quality name for model-level and field-level quality tests --- CHANGELOG.md | 1 + datacontract/breaking/breaking.py | 6 +- .../engines/soda/check_soda_execute.py | 6 +- datacontract/export/dbt_converter.py | 8 +- datacontract/export/exporter.py | 1 - datacontract/export/odcs_v3_exporter.py | 72 +++++++----- datacontract/export/sodacl_converter.py | 5 +- datacontract/imports/odcs_v3_importer.py | 5 + datacontract/lint/resolve.py | 8 +- .../model/data_contract_specification.py | 5 +- pyproject.toml | 2 +- tests/fixtures/export/datacontract.yaml | 9 +- .../odcs_v3/full-example.datacontract.yml | 10 ++ tests/fixtures/postgres/datacontract.yaml | 2 +- .../postgres/datacontract_case_sensitive.yaml | 2 +- tests/fixtures/postgres/odcs.yaml | 2 +- tests/fixtures/quality/datacontract.yaml | 5 +- tests/test_export_odcs_v3.py | 7 ++ tests/test_export_sodacl.py | 103 +++++++++--------- tests/test_test_postgres.py | 2 +- tests/test_test_quality.py | 6 +- 21 files changed, 153 insertions(+), 114 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 93823122..2f1106b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - SodaCL: Prevent `KeyError: 'fail'` from happening when testing with SodaCL - fix: populate database and schema values for bigquery in exported dbt sources (#543) - Fixing the options for importing and exporting to standard output (#544) +- Fixing the data quality name for model-level and field-level quality tests ## [0.10.15] - 2024-10-26 diff --git a/datacontract/breaking/breaking.py b/datacontract/breaking/breaking.py index 3dffc365..5ee5d1d0 100644 --- a/datacontract/breaking/breaking.py +++ b/datacontract/breaking/breaking.py @@ -1,6 +1,6 @@ from datacontract.breaking.breaking_rules import BreakingRules from datacontract.model.breaking_change import BreakingChange, Location, Severity -from datacontract.model.data_contract_specification import Contact, Field, Info, Model, Quality, Terms +from datacontract.model.data_contract_specification import Contact, DeprecatedQuality, Field, Info, Model, Terms def info_breaking_changes( @@ -216,8 +216,8 @@ def terms_breaking_changes( def quality_breaking_changes( - old_quality: Quality, - new_quality: Quality, + old_quality: DeprecatedQuality, + new_quality: DeprecatedQuality, new_path: str, include_severities: [Severity], ) -> list[BreakingChange]: diff --git a/datacontract/engines/soda/check_soda_execute.py b/datacontract/engines/soda/check_soda_execute.py index 41de8757..8fde2a6c 100644 --- a/datacontract/engines/soda/check_soda_execute.py +++ b/datacontract/engines/soda/check_soda_execute.py @@ -12,7 +12,7 @@ from datacontract.engines.soda.connections.trino import to_trino_soda_configuration from datacontract.export.sodacl_converter import to_sodacl_yaml from datacontract.model.data_contract_specification import DataContractSpecification, Server -from datacontract.model.run import Check, Log, Run +from datacontract.model.run import Check, Log, ResultEnum, Run def check_soda_execute(run: Run, data_contract: DataContractSpecification, server: Server, spark, tmp_dir): @@ -33,7 +33,7 @@ def check_soda_execute(run: Run, data_contract: DataContractSpecification, serve Check( type="general", name="Check that format is supported", - result="warning", + result=ResultEnum.warning, reason=f"Format {server.format} not yet supported by datacontract CLI", engine="datacontract", ) @@ -93,7 +93,7 @@ def check_soda_execute(run: Run, data_contract: DataContractSpecification, serve Check( type="general", name="Check that server type is supported", - result="warning", + result=ResultEnum.warning, reason=f"Server type {server.type} not yet supported by datacontract CLI", engine="datacontract-cli", ) diff --git a/datacontract/export/dbt_converter.py b/datacontract/export/dbt_converter.py index 027e72a6..9e97c0d3 100644 --- a/datacontract/export/dbt_converter.py +++ b/datacontract/export/dbt_converter.py @@ -177,9 +177,7 @@ def _to_column(field_name: str, field: Field, supports_constraints: bool, adapte length_test["min_value"] = field.minLength if field.maxLength is not None: length_test["max_value"] = field.maxLength - column["data_tests"].append( - {"dbt_expectations.expect_column_value_lengths_to_be_between": length_test} - ) + column["data_tests"].append({"dbt_expectations.expect_column_value_lengths_to_be_between": length_test}) if field.pii is not None: column.setdefault("meta", {})["pii"] = field.pii if field.classification is not None: @@ -188,9 +186,7 @@ def _to_column(field_name: str, field: Field, supports_constraints: bool, adapte column.setdefault("tags", []).extend(field.tags) if field.pattern is not None: # Beware, the data contract pattern is a regex, not a like pattern - column["data_tests"].append( - {"dbt_expectations.expect_column_values_to_match_regex": {"regex": field.pattern}} - ) + column["data_tests"].append({"dbt_expectations.expect_column_values_to_match_regex": {"regex": field.pattern}}) if ( field.minimum is not None or field.maximum is not None diff --git a/datacontract/export/exporter.py b/datacontract/export/exporter.py index ab3cefd0..0015deec 100644 --- a/datacontract/export/exporter.py +++ b/datacontract/export/exporter.py @@ -42,7 +42,6 @@ class ExportFormat(str, Enum): dcs = "dcs" iceberg = "iceberg" - @classmethod def get_supported_formats(cls): return list(map(lambda c: c.value, cls)) diff --git a/datacontract/export/odcs_v3_exporter.py b/datacontract/export/odcs_v3_exporter.py index 37c69d57..2a325b09 100644 --- a/datacontract/export/odcs_v3_exporter.py +++ b/datacontract/export/odcs_v3_exporter.py @@ -148,6 +148,10 @@ def to_odcs_schema(model_key, model_value: Model) -> dict: if properties: odcs_table["properties"] = properties + model_quality = to_odcs_quality_list(model_value.quality) + if len(model_quality) > 0: + odcs_table["quality"] = model_quality + odcs_table["customProperties"] = [] if model_value.model_extra is not None: for key, value in model_value.model_extra.items(): @@ -257,38 +261,48 @@ def to_property(field_name: str, field: Field) -> dict: del property["logicalTypeOptions"] if field.quality is not None: - quality_property = [] - for quality in field.quality: - quality_dict = {"type": quality.type} - if quality.description is not None: - quality_dict["description"] = quality.description - if quality.query is not None: - quality_dict["query"] = quality.query - # dialect is not supported in v3.0.0 - if quality.mustBe is not None: - quality_dict["mustBe"] = quality.mustBe - if quality.mustNotBe is not None: - quality_dict["mustNotBe"] = quality.mustNotBe - if quality.mustBeGreaterThan is not None: - quality_dict["mustBeGreaterThan"] = quality.mustBeGreaterThan - if quality.mustBeGreaterThanOrEqualTo is not None: - quality_dict["mustBeGreaterThanOrEqualTo"] = quality.mustBeGreaterThanOrEqualTo - if quality.mustBeLessThan is not None: - quality_dict["mustBeLessThan"] = quality.mustBeLessThan - if quality.mustBeLessThanOrEqualTo is not None: - quality_dict["mustBeLessThanOrEqualTo"] = quality.mustBeLessThanOrEqualTo - if quality.mustBeBetween is not None: - quality_dict["mustBeBetween"] = quality.mustBeBetween - if quality.mustNotBeBetween is not None: - quality_dict["mustNotBeBetween"] = quality.mustNotBeBetween - if quality.engine is not None: - quality_dict["engine"] = quality.engine - if quality.implementation is not None: - quality_dict["implementation"] = quality.implementation - quality_property.append(quality_dict) + quality_list = field.quality + quality_property = to_odcs_quality_list(quality_list) if len(quality_property) > 0: property["quality"] = quality_property # todo enum return property + + +def to_odcs_quality_list(quality_list): + quality_property = [] + for quality in quality_list: + quality_property.append(to_odcs_quality(quality)) + return quality_property + + +def to_odcs_quality(quality): + quality_dict = {"type": quality.type} + if quality.description is not None: + quality_dict["description"] = quality.description + if quality.query is not None: + quality_dict["query"] = quality.query + # dialect is not supported in v3.0.0 + if quality.mustBe is not None: + quality_dict["mustBe"] = quality.mustBe + if quality.mustNotBe is not None: + quality_dict["mustNotBe"] = quality.mustNotBe + if quality.mustBeGreaterThan is not None: + quality_dict["mustBeGreaterThan"] = quality.mustBeGreaterThan + if quality.mustBeGreaterThanOrEqualTo is not None: + quality_dict["mustBeGreaterThanOrEqualTo"] = quality.mustBeGreaterThanOrEqualTo + if quality.mustBeLessThan is not None: + quality_dict["mustBeLessThan"] = quality.mustBeLessThan + if quality.mustBeLessThanOrEqualTo is not None: + quality_dict["mustBeLessThanOrEqualTo"] = quality.mustBeLessThanOrEqualTo + if quality.mustBeBetween is not None: + quality_dict["mustBeBetween"] = quality.mustBeBetween + if quality.mustNotBeBetween is not None: + quality_dict["mustNotBeBetween"] = quality.mustNotBeBetween + if quality.engine is not None: + quality_dict["engine"] = quality.engine + if quality.implementation is not None: + quality_dict["implementation"] = quality.implementation + return quality_dict diff --git a/datacontract/export/sodacl_converter.py b/datacontract/export/sodacl_converter.py index f97c9615..b64b163f 100644 --- a/datacontract/export/sodacl_converter.py +++ b/datacontract/export/sodacl_converter.py @@ -200,9 +200,9 @@ def check_quality_list(model_name, field_name, quality_list: List[Quality]): for quality in quality_list: if quality.type == "sql": if field_name is None: - metric_name = f"{model_name}_{field_name}_quality_sql_{count}" - else: metric_name = f"{model_name}_quality_sql_{count}" + else: + metric_name = f"{model_name}_{field_name}_quality_sql_{count}" threshold = to_sodacl_threshold(quality) query = prepare_query(quality, model_name, field_name) if query is None: @@ -265,6 +265,7 @@ def to_sodacl_threshold(quality: Quality) -> str | None: return None +# These are deprecated root-level quality specifications, use the model-level and field-level quality fields instead def add_quality_checks(sodacl, data_contract_spec): if data_contract_spec.quality is None: return diff --git a/datacontract/imports/odcs_v3_importer.py b/datacontract/imports/odcs_v3_importer.py index 49aac259..7fcb8b8a 100644 --- a/datacontract/imports/odcs_v3_importer.py +++ b/datacontract/imports/odcs_v3_importer.py @@ -14,6 +14,7 @@ Field, Info, Model, + Quality, Retention, Server, ServiceLevel, @@ -193,6 +194,10 @@ def import_models(odcs_contract: Dict[str, Any]) -> Dict[str, Model]: model.fields = import_fields( odcs_schema.get("properties"), custom_type_mappings, server_type=get_server_type(odcs_contract) ) + if odcs_schema.get("quality") is not None: + # convert dict to pydantic model + + model.quality = [Quality.model_validate(q) for q in odcs_schema.get("quality")] model.title = schema_name if odcs_schema.get("dataGranularityDescription") is not None: model.config = {"dataGranularityDescription": odcs_schema.get("dataGranularityDescription")} diff --git a/datacontract/lint/resolve.py b/datacontract/lint/resolve.py index d3e77fe3..8947ff1b 100644 --- a/datacontract/lint/resolve.py +++ b/datacontract/lint/resolve.py @@ -9,7 +9,11 @@ from datacontract.lint.resources import read_resource from datacontract.lint.schema import fetch_schema from datacontract.lint.urls import fetch_resource -from datacontract.model.data_contract_specification import DataContractSpecification, Definition, Quality +from datacontract.model.data_contract_specification import ( + DataContractSpecification, + Definition, + DeprecatedQuality, +) from datacontract.model.exceptions import DataContractException from datacontract.model.odcs import is_open_data_contract_standard @@ -156,7 +160,7 @@ def _fetch_file(path) -> str: return file.read() -def _resolve_quality_ref(quality: Quality): +def _resolve_quality_ref(quality: DeprecatedQuality): """ Return the content of a ref file path @param quality data contract quality specification diff --git a/datacontract/model/data_contract_specification.py b/datacontract/model/data_contract_specification.py index afeb3228..2af37781 100644 --- a/datacontract/model/data_contract_specification.py +++ b/datacontract/model/data_contract_specification.py @@ -214,7 +214,8 @@ class Example(pyd.BaseModel): data: str | object = None -class Quality(pyd.BaseModel): +# Deprecated Quality class +class DeprecatedQuality(pyd.BaseModel): type: str = None specification: str | object = None @@ -287,7 +288,7 @@ class DataContractSpecification(pyd.BaseModel): default_factory=list, deprecated="Removed in Data Contract Specification " "v1.1.0. Use models.examples instead.", ) - quality: Quality = pyd.Field( + quality: DeprecatedQuality = pyd.Field( default=None, deprecated="Removed in Data Contract Specification v1.1.0. Use " "model-level and field-level quality instead.", ) diff --git a/pyproject.toml b/pyproject.toml index 9c7f3e2b..f1e0e4ab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -131,7 +131,7 @@ build-backend = "setuptools.build_meta" [tool.pytest.ini_options] #addopts = "-n 8" # run tests in parallel, you can disable parallel test execution with "pytest -n0" command log_level = "INFO" -#log_cli = "true" # activate live logging, do not use with -n 8 xdist option for parallel test execution: https://github.com/pytest-dev/pytest-xdist/issues/402 +log_cli = "true" # activate live logging, do not use with -n 8 xdist option for parallel test execution: https://github.com/pytest-dev/pytest-xdist/issues/402 log_cli_level = "INFO" [tool.ruff] diff --git a/tests/fixtures/export/datacontract.yaml b/tests/fixtures/export/datacontract.yaml index 92b6a61a..d8423f37 100644 --- a/tests/fixtures/export/datacontract.yaml +++ b/tests/fixtures/export/datacontract.yaml @@ -65,4 +65,11 @@ models: enum: - pending - shipped - - delivered \ No newline at end of file + - delivered + quality: + - type: sql + description: Row Count + query: | + SELECT COUNT(*) AS row_count + FROM orders + mustBeGreaterThan: 1000 \ No newline at end of file diff --git a/tests/fixtures/odcs_v3/full-example.datacontract.yml b/tests/fixtures/odcs_v3/full-example.datacontract.yml index b4ad5987..3461b0f4 100644 --- a/tests/fixtures/odcs_v3/full-example.datacontract.yml +++ b/tests/fixtures/odcs_v3/full-example.datacontract.yml @@ -81,6 +81,16 @@ models: partitionKeyPosition: -1 criticalDataElement: false encryptedName: rcvr_cntry_code_encrypted + quality: + - type: library + description: Ensure row count is within expected volume range + rule: countCheck + dimension: completeness + method: reconciliation + severity: error + businessImpact: operational + schedule: 0 20 * * * + scheduler: cron config: dataGranularityDescription: Aggregation on columns txn_ref_dt, pmt_txn_id servicelevels: diff --git a/tests/fixtures/postgres/datacontract.yaml b/tests/fixtures/postgres/datacontract.yaml index 4f145b19..1333b24f 100644 --- a/tests/fixtures/postgres/datacontract.yaml +++ b/tests/fixtures/postgres/datacontract.yaml @@ -8,7 +8,7 @@ servers: my-dataproduct/postgres: type: postgres host: localhost - port: __PORT__ + port: 5432 database: test schema: public models: diff --git a/tests/fixtures/postgres/datacontract_case_sensitive.yaml b/tests/fixtures/postgres/datacontract_case_sensitive.yaml index f5298c89..cf4a26fe 100644 --- a/tests/fixtures/postgres/datacontract_case_sensitive.yaml +++ b/tests/fixtures/postgres/datacontract_case_sensitive.yaml @@ -8,7 +8,7 @@ servers: my-dataproduct/postgres: type: postgres host: localhost - port: __PORT__ + port: 5432 database: test schema: public models: diff --git a/tests/fixtures/postgres/odcs.yaml b/tests/fixtures/postgres/odcs.yaml index 16da2fcd..3b20f356 100644 --- a/tests/fixtures/postgres/odcs.yaml +++ b/tests/fixtures/postgres/odcs.yaml @@ -48,4 +48,4 @@ servers: database: test schema: public host: localhost - port: __PORT__ \ No newline at end of file + port: 5432 \ No newline at end of file diff --git a/tests/fixtures/quality/datacontract.yaml b/tests/fixtures/quality/datacontract.yaml index d56cb688..fc223000 100644 --- a/tests/fixtures/quality/datacontract.yaml +++ b/tests/fixtures/quality/datacontract.yaml @@ -8,7 +8,7 @@ servers: my-dataproduct/postgres: type: postgres host: localhost - port: __PORT__ + port: 5432 database: test schema: public models: @@ -28,8 +28,7 @@ models: - type: sql description: 95% of all order total values are expected to be between 10 and 499 EUR. dialect: postgres - query: | - SELECT percentile_cont(0.95) WITHIN GROUP (ORDER BY field_two) AS percentile_95 FROM my_table + query: SELECT percentile_cont(0.95) WITHIN GROUP (ORDER BY field_two) AS percentile_95 FROM my_table mustBeBetween: [ 1000, 49900 ] field_three: type: timestamp diff --git a/tests/test_export_odcs_v3.py b/tests/test_export_odcs_v3.py index 905332b7..6cba4b30 100644 --- a/tests/test_export_odcs_v3.py +++ b/tests/test_export_odcs_v3.py @@ -77,6 +77,13 @@ def test_to_odcs(): logicalType: string physicalType: text isNullable: false + quality: + - type: sql + description: Row Count + query: | + SELECT COUNT(*) AS row_count + FROM orders + mustBeGreaterThan: 1000 servers: - server: production diff --git a/tests/test_export_sodacl.py b/tests/test_export_sodacl.py index 4cb747c4..76d5528b 100644 --- a/tests/test_export_sodacl.py +++ b/tests/test_export_sodacl.py @@ -1,62 +1,10 @@ -import pytest import yaml from datacontract.export.sodacl_converter import to_sodacl_yaml from datacontract.model.data_contract_specification import DataContractSpecification -@pytest.fixture -def check_expected() -> str: - expected = """ - checks for orders: - - schema: - name: Check that field order_id is present - fail: - when required column missing: - - order_id - - schema: - name: Check that field order_id has type string - fail: - when wrong column type: - order_id: string - - missing_count(order_id) = 0: - name: Check that required field order_id has no null values - - schema: - name: Check that field processed_timestamp is present - fail: - when required column missing: - - processed_timestamp - - schema: - name: Check that field processed_timestamp has type timestamp - fail: - when wrong column type: - processed_timestamp: timestamp - - missing_count(processed_timestamp) = 0: - name: Check that required field processed_timestamp has no null values - - schema: - name: Check that field order_total is present - fail: - when required column missing: - - order_total - - schema: - name: Check that field order_total has type integer - fail: - when wrong column type: - order_total: integer - - orders_quality_sql_0 between 1000 and 49900: - orders_quality_sql_0 query: | - SELECT quantile_cont(order_total, 0.95) AS percentile_95 - FROM orders - - freshness(processed_timestamp) < 1d - - row_count > 10 - checks for line_items: - - row_count > 10: - name: Have at lease 10 line items - """ - return expected - - -def test_export_sodacl(check_expected: str): +def test_export_sodacl(): data_contract_specification_str = """ dataContractSpecification: 1.1.0 models: @@ -88,9 +36,56 @@ def test_export_sodacl(check_expected: str): name: Have at lease 10 line items """ + expected = """ + checks for orders: + - schema: + name: Check that field order_id is present + fail: + when required column missing: + - order_id + - schema: + name: Check that field order_id has type string + fail: + when wrong column type: + order_id: string + - missing_count(order_id) = 0: + name: Check that required field order_id has no null values + - schema: + name: Check that field processed_timestamp is present + fail: + when required column missing: + - processed_timestamp + - schema: + name: Check that field processed_timestamp has type timestamp + fail: + when wrong column type: + processed_timestamp: timestamp + - missing_count(processed_timestamp) = 0: + name: Check that required field processed_timestamp has no null values + - schema: + name: Check that field order_total is present + fail: + when required column missing: + - order_total + - schema: + name: Check that field order_total has type integer + fail: + when wrong column type: + order_total: integer + - orders_order_total_quality_sql_0 between 1000 and 49900: + orders_order_total_quality_sql_0 query: | + SELECT quantile_cont(order_total, 0.95) AS percentile_95 + FROM orders + - freshness(processed_timestamp) < 1d + - row_count > 10 + checks for line_items: + - row_count > 10: + name: Have at lease 10 line items + """ + data = yaml.safe_load(data_contract_specification_str) data_contract_specification = DataContractSpecification(**data) result = to_sodacl_yaml(data_contract_specification) - assert yaml.safe_load(result) == yaml.safe_load(check_expected) + assert yaml.safe_load(result) == yaml.safe_load(expected) diff --git a/tests/test_test_postgres.py b/tests/test_test_postgres.py index 854c86f1..73dbb852 100644 --- a/tests/test_test_postgres.py +++ b/tests/test_test_postgres.py @@ -72,7 +72,7 @@ def _setup_datacontract(file): with open(file) as data_contract_file: data_contract_str = data_contract_file.read() port = postgres.get_exposed_port(5432) - data_contract_str = data_contract_str.replace("__PORT__", port) + data_contract_str = data_contract_str.replace("5432", port) return data_contract_str diff --git a/tests/test_test_quality.py b/tests/test_test_quality.py index c6ec0e13..9c7b5e40 100644 --- a/tests/test_test_quality.py +++ b/tests/test_test_quality.py @@ -48,17 +48,17 @@ def test_test_quality_invalid(postgres_container, monkeypatch): print(run.pretty()) assert run.result == "failed" assert any( - check.name == "my_table_quality_sql_0 between 1000 and 49900" and check.result == "failed" + check.name == "my_table_field_two_quality_sql_0 between 1000 and 49900" and check.result == "failed" for check in run.checks ) - assert any(check.name == "my_table_None_quality_sql_0 < 3600" and check.result == "failed" for check in run.checks) + assert any(check.name == "my_table_quality_sql_0 < 3600" and check.result == "failed" for check in run.checks) def _setup_datacontract(file): with open(file) as data_contract_file: data_contract_str = data_contract_file.read() port = postgres.get_exposed_port(5432) - data_contract_str = data_contract_str.replace("__PORT__", port) + data_contract_str = data_contract_str.replace("5432", port) return data_contract_str From 65268e6ee13c5acd2a10189bc42db2f2699cf14f Mon Sep 17 00:00:00 2001 From: jochen Date: Thu, 19 Dec 2024 15:01:00 +0100 Subject: [PATCH 2/5] Fix tests --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index f1e0e4ab..9c7f3e2b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -131,7 +131,7 @@ build-backend = "setuptools.build_meta" [tool.pytest.ini_options] #addopts = "-n 8" # run tests in parallel, you can disable parallel test execution with "pytest -n0" command log_level = "INFO" -log_cli = "true" # activate live logging, do not use with -n 8 xdist option for parallel test execution: https://github.com/pytest-dev/pytest-xdist/issues/402 +#log_cli = "true" # activate live logging, do not use with -n 8 xdist option for parallel test execution: https://github.com/pytest-dev/pytest-xdist/issues/402 log_cli_level = "INFO" [tool.ruff] From 119a40084c8455fe47139fc0bd89c3181cf179bb Mon Sep 17 00:00:00 2001 From: jochen Date: Thu, 19 Dec 2024 15:57:21 +0100 Subject: [PATCH 3/5] Prepare release --- CHANGELOG.md | 8 +++++++- pyproject.toml | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2f1106b6..29bbe1af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +### Changed + +## [0.10.16] - 2024-12-19 + ### Added - Support for exporting a Data Contract to an Iceberg schema definition. - When importing in dbt format, add the dbt `not_null` information as a datacontract `required` field (#547) @@ -24,7 +30,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fixing the options for importing and exporting to standard output (#544) - Fixing the data quality name for model-level and field-level quality tests -## [0.10.15] - 2024-10-26 +## [0.10.15] - 2024-12-02 ### Added - Support for model import from parquet file metadata. diff --git a/pyproject.toml b/pyproject.toml index 9c7f3e2b..b9e97dec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "datacontract-cli" -version = "0.10.15" +version = "0.10.16" description = "The datacontract CLI is an open source command-line tool for working with Data Contracts. It uses data contract YAML files to lint the data contract, connect to data sources and execute schema and quality tests, detect breaking changes, and export to different formats. The tool is written in Python. It can be used as a standalone CLI tool, in a CI/CD pipeline, or directly as a Python library." readme = "README.md" authors = [ From feec6a7696a01c6417ba948918fd8426c4157513 Mon Sep 17 00:00:00 2001 From: jochen Date: Thu, 19 Dec 2024 16:35:35 +0100 Subject: [PATCH 4/5] Improve reason --- datacontract/engines/soda/check_soda_execute.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datacontract/engines/soda/check_soda_execute.py b/datacontract/engines/soda/check_soda_execute.py index 8fde2a6c..8cacca83 100644 --- a/datacontract/engines/soda/check_soda_execute.py +++ b/datacontract/engines/soda/check_soda_execute.py @@ -183,4 +183,4 @@ def update_reason(check, c): # print(check.reason) break # Exit the loop once the desired block is found if "fail" in c["diagnostics"]: - check.reason = f"Got: {c['diagnostics']['value']} Expected: {c['diagnostics']['fail']}" + check.reason = f"Value: {c['diagnostics']['value']} Fails`: {c['diagnostics']['fail']}" From a18288fc29ce5c1fae69dd4282cfcba98a278ce3 Mon Sep 17 00:00:00 2001 From: jochen Date: Thu, 19 Dec 2024 22:03:27 +0100 Subject: [PATCH 5/5] Update dependencies --- pyproject.toml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b9e97dec..813229b0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,9 @@ avro = [ ] bigquery = [ - "soda-core-bigquery>=3.3.1,<3.5.0" + "soda-core-bigquery>=3.3.1,<3.5.0", + "google-cloud-bigquery-storage>=2.27.0", + "grpcio==1.62.3" # avoid strange log output in newer versions ] databricks = [