From 46f3309568a6ea5923b540c73bb3fb561536f674 Mon Sep 17 00:00:00 2001 From: Simon Harrer Date: Tue, 13 Feb 2024 14:29:33 +0100 Subject: [PATCH 01/11] Add "test --examples" --- datacontract/cli.py | 10 ++- datacontract/data_contract.py | 63 ++++++++++++++++++- .../fastjsonschema/check_jsonschema.py | 2 +- .../local-json-simple/datacontract.yaml | 39 ++++++++++++ tests/test_examples_local_json_example.py | 22 +++++++ 5 files changed, 133 insertions(+), 3 deletions(-) create mode 100644 tests/examples/local-json-simple/datacontract.yaml create mode 100644 tests/test_examples_local_json_example.py diff --git a/datacontract/cli.py b/datacontract/cli.py index 2a7a4724..235425c9 100644 --- a/datacontract/cli.py +++ b/datacontract/cli.py @@ -87,13 +87,21 @@ def test( "Use the key of the server object in the data contract yaml file " "to refer to a server, e.g., `production`, or `all` for all " "servers (default).")] = "all", + examples: Annotated[bool, typer.Option( + help="Run the schema and quality tests on the example data within the data contract.")] = None, publish: Annotated[str, typer.Option( - help="")] = None, + help="The url to publish the results after the test")] = None, ): """ Run schema and quality tests on configured servers. """ print(f"Testing {location}") + if examples: + print(f"Using examples") + run = DataContract(data_contract_file=location).testExample() + _handle_result(run) + return + run = DataContract(data_contract_file=location, publish_url=publish).test() _handle_result(run) diff --git a/datacontract/data_contract.py b/datacontract/data_contract.py index a1744413..64809cc5 100644 --- a/datacontract/data_contract.py +++ b/datacontract/data_contract.py @@ -1,5 +1,7 @@ import json import logging +import tempfile +import yaml from datacontract.engines.datacontract.check_that_datacontract_contains_valid_servers_configuration import \ check_that_datacontract_contains_valid_server_configuration @@ -13,7 +15,7 @@ from datacontract.lint import resolve from datacontract.lint.linters.example_model_linter import ExampleModelLinter from datacontract.model.data_contract_specification import \ - DataContractSpecification + DataContractSpecification, Server from datacontract.model.exceptions import DataContractException from datacontract.model.run import \ Run, Check @@ -125,6 +127,65 @@ def test(self) -> Run: return run + def testExample(self) -> Run: + run = Run.create_run() + try: + run.log_info(f"Testing data contract") + data_contract = resolve.resolve_data_contract(self._data_contract_file, self._data_contract_str, + self._data_contract) + + # TODO check yaml contains models + run.log_info(f"Running tests for data contract {data_contract.id} against examples") + run.dataContractId = data_contract.id + run.dataContractVersion = data_contract.info.version + run.server = "examples" + + with tempfile.TemporaryDirectory(prefix="datacontract-cli") as tmp_dir: + run.log_info(f"Writing example data to {tmp_dir} to be used as a local server") + path = f"{tmp_dir}" + "/{model}.json" + format = "json" + delimiter = "array" + for example in data_contract.examples: + format = example.type + p = f"{tmp_dir}/{example.model}.json" + run.log_info(f"Creating example file {p}") + with open(p, "w") as f: + f.write(json.dumps(example.data)) + + server = Server( + type="local", + path=path, + format=format, + delimiter=delimiter, + ) + print(server) + run.log_info(f"Using {server} for testing the examples") + + # 5. check server is supported type + # 6. check server credentials are complete + if server.format == "json": + check_jsonschema(run, data_contract, server) + check_soda_execute(run, data_contract, server, self._spark) + + except DataContractException as e: + run.checks.append(Check( + type=e.type, + result=e.result, + name=e.name, + reason=e.reason, + engine=e.engine, + details="" + )) + run.log_error(str(e)) + + + run.finish() + + if self._publish_url is not None: + publish_datamesh_manager(run, self._publish_url) + + return run + def diff(self, other): pass diff --git a/datacontract/engines/fastjsonschema/check_jsonschema.py b/datacontract/engines/fastjsonschema/check_jsonschema.py index 4a32f61d..d24f52f4 100644 --- a/datacontract/engines/fastjsonschema/check_jsonschema.py +++ b/datacontract/engines/fastjsonschema/check_jsonschema.py @@ -40,7 +40,7 @@ def read_json_lines_from_file(file): def read_json_array(file): - data = json.loads(file) + data = json.load(file) for item in data: yield item diff --git a/tests/examples/local-json-simple/datacontract.yaml b/tests/examples/local-json-simple/datacontract.yaml new file mode 100644 index 00000000..3563fd0f --- /dev/null +++ b/tests/examples/local-json-simple/datacontract.yaml @@ -0,0 +1,39 @@ +dataContractSpecification: 0.9.2 +id: "61111-0002" +info: + title: "Verbraucherpreisindex: Deutschland, Monate" + description: A data contract for the distribution and use of the German Consumer Price Index data. + version: 1.0.0 + owner: my-domain-team +models: + verbraucherpreisindex: + description: Model representing the Consumer Price Index for Germany + fields: + wert: + description: Value of the Consumer Price Index + type: integer + required: true + jahrMonat: + description: Year and month of the data + type: string + required: true + qualitaet: + description: Quality of the data + type: string + enum: + - "vorlaeufig" + - "endgueltig" + +examples: + - type: json + description: Example entry for CPI data + model: verbraucherpreisindex + data: + - wert: 99 + jahrMonat: "2022-00" + - wert: 100 + jahrMonat: "2022-01" + - wert: 101 + jahrMonat: "2022-02" + qualitaet: "vorlaeufig" + diff --git a/tests/test_examples_local_json_example.py b/tests/test_examples_local_json_example.py new file mode 100644 index 00000000..4c544773 --- /dev/null +++ b/tests/test_examples_local_json_example.py @@ -0,0 +1,22 @@ +import pytest +from typer.testing import CliRunner + +from datacontract.cli import app +from datacontract.data_contract import DataContract + +runner = CliRunner() + + +def test_cli(): + result = runner.invoke(app, ["test", "--examples", "./examples/local-json-simple/datacontract.yaml"]) + assert result.exit_code == 0 + + +def test_local_json(): + data_contract = DataContract(data_contract_file="examples/local-json-simple/datacontract.yaml") + run = data_contract.testExample() + print(run) + print(run.result) + assert run.result == "passed" + + From 022c775f625b3aa9980d593475ee5b06f37dff6e Mon Sep 17 00:00:00 2001 From: Simon Harrer Date: Tue, 13 Feb 2024 14:49:21 +0100 Subject: [PATCH 02/11] Add "test --examples" --- datacontract/data_contract.py | 22 +++++++++--- .../local-json-simple/datacontract_csv.yaml | 35 +++++++++++++++++++ ...act.yaml => datacontract_json_inline.yaml} | 0 tests/test_examples_local_json_example.py | 4 +-- tests/test_examples_local_json_example_csv.py | 22 ++++++++++++ 5 files changed, 77 insertions(+), 6 deletions(-) create mode 100644 tests/examples/local-json-simple/datacontract_csv.yaml rename tests/examples/local-json-simple/{datacontract.yaml => datacontract_json_inline.yaml} (100%) create mode 100644 tests/test_examples_local_json_example_csv.py diff --git a/datacontract/data_contract.py b/datacontract/data_contract.py index 64809cc5..403d7d0f 100644 --- a/datacontract/data_contract.py +++ b/datacontract/data_contract.py @@ -1,6 +1,8 @@ import json import logging import tempfile +from typing import List + import yaml from datacontract.engines.datacontract.check_that_datacontract_contains_valid_servers_configuration import \ @@ -142,15 +144,27 @@ def testExample(self) -> Run: with tempfile.TemporaryDirectory(prefix="datacontract-cli") as tmp_dir: run.log_info(f"Writing example data to {tmp_dir} to be used as a local server") - path = f"{tmp_dir}" + "/{model}.json" format = "json" - delimiter = "array" + for example in data_contract.examples: format = example.type - p = f"{tmp_dir}/{example.model}.json" + p = f"{tmp_dir}/{example.model}.{format}" run.log_info(f"Creating example file {p}") with open(p, "w") as f: - f.write(json.dumps(example.data)) + content = "" + if format == "json" and example.data is List: + content = json.dumps(example.data) + elif format == "json" and example.data is str: + content = example.data + elif format == "yaml" and example.data is List: + content = yaml.dump(example.data) + elif format == "yaml" and example.data is str: + content = example.data + elif format == "csv": + content = example.data + f.write(content) + path = f"{tmp_dir}" + "/{model}." + format + delimiter = "array" server = Server( type="local", diff --git a/tests/examples/local-json-simple/datacontract_csv.yaml b/tests/examples/local-json-simple/datacontract_csv.yaml new file mode 100644 index 00000000..77569d37 --- /dev/null +++ b/tests/examples/local-json-simple/datacontract_csv.yaml @@ -0,0 +1,35 @@ +dataContractSpecification: 0.9.2 +id: "61111-0002" +info: + title: "Verbraucherpreisindex: Deutschland, Monate" + description: A data contract for the distribution and use of the German Consumer Price Index data. + version: 1.0.0 + owner: my-domain-team +models: + verbraucherpreisindex: + description: Model representing the Consumer Price Index for Germany + fields: + wert: + description: Value of the Consumer Price Index + type: bigint # integer + required: true + jahrMonat: + description: Year and month of the data + type: varchar # string + required: true + qualitaet: + description: Quality of the data + type: varchar # string + enum: + - "vorlaeufig" + - "endgueltig" +examples: + - type: csv + description: Example entry for CPI data + model: verbraucherpreisindex + data: |- + wert, jahrMonat, qualitaet + 99, "2022-00", + 100, "2022-01", + 101, "2022-02", "vorlaeufig" + diff --git a/tests/examples/local-json-simple/datacontract.yaml b/tests/examples/local-json-simple/datacontract_json_inline.yaml similarity index 100% rename from tests/examples/local-json-simple/datacontract.yaml rename to tests/examples/local-json-simple/datacontract_json_inline.yaml diff --git a/tests/test_examples_local_json_example.py b/tests/test_examples_local_json_example.py index 4c544773..e48c6857 100644 --- a/tests/test_examples_local_json_example.py +++ b/tests/test_examples_local_json_example.py @@ -8,12 +8,12 @@ def test_cli(): - result = runner.invoke(app, ["test", "--examples", "./examples/local-json-simple/datacontract.yaml"]) + result = runner.invoke(app, ["test", "--examples", "./examples/local-json-simple/datacontract_json_inline.yaml"]) assert result.exit_code == 0 def test_local_json(): - data_contract = DataContract(data_contract_file="examples/local-json-simple/datacontract.yaml") + data_contract = DataContract(data_contract_file="examples/local-json-simple/datacontract_json_inline.yaml") run = data_contract.testExample() print(run) print(run.result) diff --git a/tests/test_examples_local_json_example_csv.py b/tests/test_examples_local_json_example_csv.py new file mode 100644 index 00000000..ce626cc7 --- /dev/null +++ b/tests/test_examples_local_json_example_csv.py @@ -0,0 +1,22 @@ +import pytest +from typer.testing import CliRunner + +from datacontract.cli import app +from datacontract.data_contract import DataContract + +runner = CliRunner() + + +def test_cli(): + result = runner.invoke(app, ["test", "--examples", "./examples/local-json-simple/datacontract_csv.yaml"]) + assert result.exit_code == 0 + + +def test_local_json(): + data_contract = DataContract(data_contract_file="examples/local-json-simple/datacontract_csv.yaml") + run = data_contract.testExample() + print(run) + print(run.result) + assert run.result == "passed" + + From 3a682a9ead00869795f13e1201941e9f79adceaa Mon Sep 17 00:00:00 2001 From: Simon Harrer Date: Tue, 13 Feb 2024 19:36:49 +0100 Subject: [PATCH 03/11] Add "test --examples" --- .../engines/fastjsonschema/check_jsonschema.py | 10 +++++----- .../engines/fastjsonschema/s3/s3_read_files.py | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/datacontract/engines/fastjsonschema/check_jsonschema.py b/datacontract/engines/fastjsonschema/check_jsonschema.py index d24f52f4..628f86ce 100644 --- a/datacontract/engines/fastjsonschema/check_jsonschema.py +++ b/datacontract/engines/fastjsonschema/check_jsonschema.py @@ -40,7 +40,7 @@ def read_json_lines_from_file(file): def read_json_array(file): - data = json.load(file) + data = json.loads(file) for item in data: yield item @@ -90,13 +90,13 @@ def process_s3_file(server, model_name, validate): s3_location = s3_location.format(model = model_name) json_stream = None - for file_content in yield_s3_files(s3_endpoint_url, s3_location): + for file in yield_s3_files(s3_endpoint_url, s3_location): if server.delimiter == "new_line": - json_stream = read_json_lines(file_content) + json_stream = read_json_lines(file) elif server.delimiter == "array": - json_stream = read_json_array(file_content) + json_stream = read_json_array(file) else: - json_stream = read_json_file(file_content) + json_stream = read_json_file(file) if json_stream is None: raise DataContractException( diff --git a/datacontract/engines/fastjsonschema/s3/s3_read_files.py b/datacontract/engines/fastjsonschema/s3/s3_read_files.py index 2172b1d3..814028b6 100644 --- a/datacontract/engines/fastjsonschema/s3/s3_read_files.py +++ b/datacontract/engines/fastjsonschema/s3/s3_read_files.py @@ -9,8 +9,8 @@ def yield_s3_files(s3_endpoint_url, s3_location): files = fs.glob(s3_location) for file in files: with fs.open(file) as f: - logging.info(f"Reading file {file}") - yield f.read() + logging.info(f"Opening file {file}") + yield f def s3_fs(s3_endpoint_url): From 9131a9e6ec44befdda5953ca4a86c41bafdfa1cf Mon Sep 17 00:00:00 2001 From: Simon Harrer Date: Tue, 13 Feb 2024 19:38:34 +0100 Subject: [PATCH 04/11] Add "test --examples" --- datacontract/engines/fastjsonschema/check_jsonschema.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/datacontract/engines/fastjsonschema/check_jsonschema.py b/datacontract/engines/fastjsonschema/check_jsonschema.py index 628f86ce..7419742e 100644 --- a/datacontract/engines/fastjsonschema/check_jsonschema.py +++ b/datacontract/engines/fastjsonschema/check_jsonschema.py @@ -29,7 +29,8 @@ def validate_json_stream(model_name, validate, json_stream): ) -def read_json_lines(file_content: str): +def read_json_lines(file): + file_content = file.read() for line in file_content.splitlines(): yield json.loads(line) @@ -40,13 +41,13 @@ def read_json_lines_from_file(file): def read_json_array(file): - data = json.loads(file) + data = json.load(file) for item in data: yield item def read_json_file(file): - yield json.loads(file.read()) + yield json.load(file) def process_json_file(run, model_name, validate, file, delimiter): From a842045a25276dcecc3a2ea08c979f25e3c89f7c Mon Sep 17 00:00:00 2001 From: Simon Harrer Date: Tue, 13 Feb 2024 19:39:18 +0100 Subject: [PATCH 05/11] Add "test --examples" --- datacontract/engines/fastjsonschema/check_jsonschema.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/datacontract/engines/fastjsonschema/check_jsonschema.py b/datacontract/engines/fastjsonschema/check_jsonschema.py index 7419742e..ab95f3a4 100644 --- a/datacontract/engines/fastjsonschema/check_jsonschema.py +++ b/datacontract/engines/fastjsonschema/check_jsonschema.py @@ -35,11 +35,6 @@ def read_json_lines(file): yield json.loads(line) -def read_json_lines_from_file(file): - for line in file: - yield json.loads(line) - - def read_json_array(file): data = json.load(file) for item in data: @@ -52,7 +47,7 @@ def read_json_file(file): def process_json_file(run, model_name, validate, file, delimiter): if delimiter == "new_line": - json_stream = read_json_lines_from_file(file) + json_stream = read_json_lines(file) elif delimiter == "array": json_stream = read_json_array(file) else: From b1d64f38f483c79e00e0b541de71d496048d4f0d Mon Sep 17 00:00:00 2001 From: Simon Harrer Date: Tue, 13 Feb 2024 20:52:57 +0100 Subject: [PATCH 06/11] Add "test --examples" --- datacontract/data_contract.py | 13 +++++++------ .../engines/fastjsonschema/check_jsonschema.py | 1 + tests/test_examples_local_json_example.py | 3 +++ 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/datacontract/data_contract.py b/datacontract/data_contract.py index 403d7d0f..c19a252a 100644 --- a/datacontract/data_contract.py +++ b/datacontract/data_contract.py @@ -137,13 +137,13 @@ def testExample(self) -> Run: self._data_contract) # TODO check yaml contains models - run.log_info(f"Running tests for data contract {data_contract.id} against examples") + run.log_info(f"Running tests for data contract {data_contract.id} ({data_contract.info.version}) against examples") run.dataContractId = data_contract.id run.dataContractVersion = data_contract.info.version run.server = "examples" with tempfile.TemporaryDirectory(prefix="datacontract-cli") as tmp_dir: - run.log_info(f"Writing example data to {tmp_dir} to be used as a local server") + run.log_info(f"Copying examples to files in temporary directory {tmp_dir}") format = "json" for example in data_contract.examples: @@ -152,16 +152,17 @@ def testExample(self) -> Run: run.log_info(f"Creating example file {p}") with open(p, "w") as f: content = "" - if format == "json" and example.data is List: + if format == "json" and type(example.data) is list: content = json.dumps(example.data) - elif format == "json" and example.data is str: + elif format == "json" and type(example.data) is str: content = example.data - elif format == "yaml" and example.data is List: + elif format == "yaml" and type(example.data) is list: content = yaml.dump(example.data) - elif format == "yaml" and example.data is str: + elif format == "yaml" and type(example.data) is str: content = example.data elif format == "csv": content = example.data + logging.debug(f"Content: {content}") f.write(content) path = f"{tmp_dir}" + "/{model}." + format delimiter = "array" diff --git a/datacontract/engines/fastjsonschema/check_jsonschema.py b/datacontract/engines/fastjsonschema/check_jsonschema.py index ab95f3a4..07ff664d 100644 --- a/datacontract/engines/fastjsonschema/check_jsonschema.py +++ b/datacontract/engines/fastjsonschema/check_jsonschema.py @@ -63,6 +63,7 @@ def process_local_file(run, server, model_name, validate): if os.path.isdir(path): return process_directory(run, path, server, model_name, validate) else: + logging.info(f"Processing file {path}") with open(path, 'r') as file: process_json_file(run, model_name, validate, file, server.delimiter) diff --git a/tests/test_examples_local_json_example.py b/tests/test_examples_local_json_example.py index e48c6857..cbd73d9b 100644 --- a/tests/test_examples_local_json_example.py +++ b/tests/test_examples_local_json_example.py @@ -1,3 +1,5 @@ +import logging + import pytest from typer.testing import CliRunner @@ -6,6 +8,7 @@ runner = CliRunner() +logging.basicConfig(level=logging.DEBUG, force=True) def test_cli(): result = runner.invoke(app, ["test", "--examples", "./examples/local-json-simple/datacontract_json_inline.yaml"]) From 0be064d987f74567b8c6017a98b929f7da9a7611 Mon Sep 17 00:00:00 2001 From: Simon Harrer Date: Tue, 13 Feb 2024 21:03:13 +0100 Subject: [PATCH 07/11] Add "test --examples" --- .../fastjsonschema/check_jsonschema.py | 25 ++++++++++++++----- .../fastjsonschema/s3/s3_read_files.py | 4 +-- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/datacontract/engines/fastjsonschema/check_jsonschema.py b/datacontract/engines/fastjsonschema/check_jsonschema.py index 07ff664d..f663ee71 100644 --- a/datacontract/engines/fastjsonschema/check_jsonschema.py +++ b/datacontract/engines/fastjsonschema/check_jsonschema.py @@ -35,16 +35,29 @@ def read_json_lines(file): yield json.loads(line) +def read_json_lines_content(file_content): + for line in file_content.splitlines(): + yield json.loads(line) + + def read_json_array(file): data = json.load(file) for item in data: yield item +def read_json_array_content(file_content): + data = json.loads(file_content) + for item in data: + yield item + def read_json_file(file): yield json.load(file) +def read_json_file_content(file_content): + yield json.loads(file_content) + def process_json_file(run, model_name, validate, file, delimiter): if delimiter == "new_line": json_stream = read_json_lines(file) @@ -58,7 +71,7 @@ def process_json_file(run, model_name, validate, file, delimiter): def process_local_file(run, server, model_name, validate): path = server.path if "{model}" in path: - path = path.format(model = model_name) + path = path.format(model=model_name) if os.path.isdir(path): return process_directory(run, path, server, model_name, validate) @@ -84,16 +97,16 @@ def process_s3_file(server, model_name, validate): s3_endpoint_url = server.endpointUrl s3_location = server.location if "{model}" in s3_location: - s3_location = s3_location.format(model = model_name) + s3_location = s3_location.format(model=model_name) json_stream = None - for file in yield_s3_files(s3_endpoint_url, s3_location): + for file_content in yield_s3_files(s3_endpoint_url, s3_location): if server.delimiter == "new_line": - json_stream = read_json_lines(file) + json_stream = read_json_lines_content(file_content) elif server.delimiter == "array": - json_stream = read_json_array(file) + json_stream = read_json_array_content(file_content) else: - json_stream = read_json_file(file) + json_stream = read_json_file_content(file_content) if json_stream is None: raise DataContractException( diff --git a/datacontract/engines/fastjsonschema/s3/s3_read_files.py b/datacontract/engines/fastjsonschema/s3/s3_read_files.py index 814028b6..fb0ae4b2 100644 --- a/datacontract/engines/fastjsonschema/s3/s3_read_files.py +++ b/datacontract/engines/fastjsonschema/s3/s3_read_files.py @@ -9,8 +9,8 @@ def yield_s3_files(s3_endpoint_url, s3_location): files = fs.glob(s3_location) for file in files: with fs.open(file) as f: - logging.info(f"Opening file {file}") - yield f + logging.info(f"Downloading file {file}") + yield f.read() def s3_fs(s3_endpoint_url): From ac93083dd820806388cbba93dce5dffa8d9e7613 Mon Sep 17 00:00:00 2001 From: Simon Harrer Date: Tue, 13 Feb 2024 21:03:43 +0100 Subject: [PATCH 08/11] Add "test --examples" --- datacontract/engines/fastjsonschema/check_jsonschema.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/datacontract/engines/fastjsonschema/check_jsonschema.py b/datacontract/engines/fastjsonschema/check_jsonschema.py index f663ee71..f85ea732 100644 --- a/datacontract/engines/fastjsonschema/check_jsonschema.py +++ b/datacontract/engines/fastjsonschema/check_jsonschema.py @@ -35,7 +35,7 @@ def read_json_lines(file): yield json.loads(line) -def read_json_lines_content(file_content): +def read_json_lines_content(file_content: str): for line in file_content.splitlines(): yield json.loads(line) @@ -45,7 +45,8 @@ def read_json_array(file): for item in data: yield item -def read_json_array_content(file_content): + +def read_json_array_content(file_content: str): data = json.loads(file_content) for item in data: yield item @@ -55,9 +56,10 @@ def read_json_file(file): yield json.load(file) -def read_json_file_content(file_content): +def read_json_file_content(file_content: str): yield json.loads(file_content) + def process_json_file(run, model_name, validate, file, delimiter): if delimiter == "new_line": json_stream = read_json_lines(file) From fb99eb306957308e0a3a1272594102d5249dbafa Mon Sep 17 00:00:00 2001 From: Simon Harrer Date: Wed, 14 Feb 2024 09:35:42 +0100 Subject: [PATCH 09/11] Add "test --examples" --- datacontract/cli.py | 8 +- datacontract/data_contract.py | 138 +++++++----------- .../datacontract_csv.yaml | 0 .../datacontract_inline.yaml} | 4 +- .../examples/examples/datacontract_json.yaml | 35 +++++ ...ample.py => test_examples_examples_csv.py} | 8 +- tests/test_examples_examples_inline.py | 25 ++++ ..._csv.py => test_examples_examples_json.py} | 11 +- 8 files changed, 128 insertions(+), 101 deletions(-) rename tests/examples/{local-json-simple => examples}/datacontract_csv.yaml (100%) rename tests/examples/{local-json-simple/datacontract_json_inline.yaml => examples/datacontract_inline.yaml} (92%) create mode 100644 tests/examples/examples/datacontract_json.yaml rename tests/{test_examples_local_json_example.py => test_examples_examples_csv.py} (64%) create mode 100644 tests/test_examples_examples_inline.py rename tests/{test_examples_local_json_example_csv.py => test_examples_examples_json.py} (56%) diff --git a/datacontract/cli.py b/datacontract/cli.py index 235425c9..b9bea568 100644 --- a/datacontract/cli.py +++ b/datacontract/cli.py @@ -96,13 +96,7 @@ def test( Run schema and quality tests on configured servers. """ print(f"Testing {location}") - if examples: - print(f"Using examples") - run = DataContract(data_contract_file=location).testExample() - _handle_result(run) - return - - run = DataContract(data_contract_file=location, publish_url=publish).test() + run = DataContract(data_contract_file=location, publish_url=publish, examples=examples).test() _handle_result(run) diff --git a/datacontract/data_contract.py b/datacontract/data_contract.py index c19a252a..a762b72c 100644 --- a/datacontract/data_contract.py +++ b/datacontract/data_contract.py @@ -30,6 +30,7 @@ def __init__( data_contract_str: str = None, data_contract: DataContractSpecification = None, server: str = None, + examples: bool = False, publish_url: str = None, spark: str = None, ): @@ -37,6 +38,7 @@ def __init__( self._data_contract_str = data_contract_str self._data_contract = data_contract self._server = server + self._examples = examples self._publish_url = publish_url self._spark = spark @@ -86,20 +88,27 @@ def test(self) -> Run: check_that_datacontract_contains_valid_server_configuration(run, data_contract, self._server) # TODO check yaml contains models - server_name = list(data_contract.servers.keys())[0] - server = data_contract.servers.get(server_name) - run.log_info(f"Running tests for data contract {data_contract.id} with server {server_name}") - run.dataContractId = data_contract.id - run.dataContractVersion = data_contract.info.version - run.dataProductId = server.dataProductId - run.outputPortId = server.outputPortId - run.server = server_name - # 5. check server is supported type - # 6. check server credentials are complete - if server.format == "json": - check_jsonschema(run, data_contract, server) - check_soda_execute(run, data_contract, server, self._spark) + with tempfile.TemporaryDirectory(prefix="datacontract-cli") as tmp_dir: + if self._examples: + server_name = "examples" + server = self._get_examples_server(data_contract, run, tmp_dir) + else: + server_name = list(data_contract.servers.keys())[0] + server = data_contract.servers.get(server_name) + + run.log_info(f"Running tests for data contract {data_contract.id} with server {server_name}") + run.dataContractId = data_contract.id + run.dataContractVersion = data_contract.info.version + run.dataProductId = server.dataProductId + run.outputPortId = server.outputPortId + run.server = server_name + + # 5. check server is supported type + # 6. check server credentials are complete + if server.format == "json": + check_jsonschema(run, data_contract, server) + check_soda_execute(run, data_contract, server, self._spark) except DataContractException as e: run.checks.append(Check( @@ -129,77 +138,6 @@ def test(self) -> Run: return run - def testExample(self) -> Run: - run = Run.create_run() - try: - run.log_info(f"Testing data contract") - data_contract = resolve.resolve_data_contract(self._data_contract_file, self._data_contract_str, - self._data_contract) - - # TODO check yaml contains models - run.log_info(f"Running tests for data contract {data_contract.id} ({data_contract.info.version}) against examples") - run.dataContractId = data_contract.id - run.dataContractVersion = data_contract.info.version - run.server = "examples" - - with tempfile.TemporaryDirectory(prefix="datacontract-cli") as tmp_dir: - run.log_info(f"Copying examples to files in temporary directory {tmp_dir}") - format = "json" - - for example in data_contract.examples: - format = example.type - p = f"{tmp_dir}/{example.model}.{format}" - run.log_info(f"Creating example file {p}") - with open(p, "w") as f: - content = "" - if format == "json" and type(example.data) is list: - content = json.dumps(example.data) - elif format == "json" and type(example.data) is str: - content = example.data - elif format == "yaml" and type(example.data) is list: - content = yaml.dump(example.data) - elif format == "yaml" and type(example.data) is str: - content = example.data - elif format == "csv": - content = example.data - logging.debug(f"Content: {content}") - f.write(content) - path = f"{tmp_dir}" + "/{model}." + format - delimiter = "array" - - server = Server( - type="local", - path=path, - format=format, - delimiter=delimiter, - ) - print(server) - run.log_info(f"Using {server} for testing the examples") - - # 5. check server is supported type - # 6. check server credentials are complete - if server.format == "json": - check_jsonschema(run, data_contract, server) - check_soda_execute(run, data_contract, server, self._spark) - - except DataContractException as e: - run.checks.append(Check( - type=e.type, - result=e.result, - name=e.name, - reason=e.reason, - engine=e.engine, - details="" - )) - run.log_error(str(e)) - - - run.finish() - - if self._publish_url is not None: - publish_datamesh_manager(run, self._publish_url) - - return run def diff(self, other): pass @@ -216,3 +154,35 @@ def export(self, export_format) -> str: else: print(f"Export format {export_format} not supported.") return "" + + def _get_examples_server(self, data_contract, run, tmp_dir): + run.log_info(f"Copying examples to files in temporary directory {tmp_dir}") + format = "json" + for example in data_contract.examples: + format = example.type + p = f"{tmp_dir}/{example.model}.{format}" + run.log_info(f"Creating example file {p}") + with open(p, "w") as f: + content = "" + if format == "json" and type(example.data) is list: + content = json.dumps(example.data) + elif format == "json" and type(example.data) is str: + content = example.data + elif format == "yaml" and type(example.data) is list: + content = yaml.dump(example.data) + elif format == "yaml" and type(example.data) is str: + content = example.data + elif format == "csv": + content = example.data + logging.debug(f"Content of example file {p}: {content}") + f.write(content) + path = f"{tmp_dir}" + "/{model}." + format + delimiter = "array" + server = Server( + type="local", + path=path, + format=format, + delimiter=delimiter, + ) + run.log_info(f"Using {server} for testing the examples") + return server diff --git a/tests/examples/local-json-simple/datacontract_csv.yaml b/tests/examples/examples/datacontract_csv.yaml similarity index 100% rename from tests/examples/local-json-simple/datacontract_csv.yaml rename to tests/examples/examples/datacontract_csv.yaml diff --git a/tests/examples/local-json-simple/datacontract_json_inline.yaml b/tests/examples/examples/datacontract_inline.yaml similarity index 92% rename from tests/examples/local-json-simple/datacontract_json_inline.yaml rename to tests/examples/examples/datacontract_inline.yaml index 3563fd0f..f0b3ae79 100644 --- a/tests/examples/local-json-simple/datacontract_json_inline.yaml +++ b/tests/examples/examples/datacontract_inline.yaml @@ -25,8 +25,8 @@ models: - "endgueltig" examples: - - type: json - description: Example entry for CPI data + - description: Example entry for CPI data + type: json # TODO should be inline model: verbraucherpreisindex data: - wert: 99 diff --git a/tests/examples/examples/datacontract_json.yaml b/tests/examples/examples/datacontract_json.yaml new file mode 100644 index 00000000..b73b2b24 --- /dev/null +++ b/tests/examples/examples/datacontract_json.yaml @@ -0,0 +1,35 @@ +dataContractSpecification: 0.9.2 +id: "61111-0002" +info: + title: "Verbraucherpreisindex: Deutschland, Monate" + description: A data contract for the distribution and use of the German Consumer Price Index data. + version: 1.0.0 + owner: my-domain-team +models: + verbraucherpreisindex: + description: Model representing the Consumer Price Index for Germany + fields: + wert: + description: Value of the Consumer Price Index + type: integer + required: true + jahrMonat: + description: Year and month of the data + type: string + required: true + qualitaet: + description: Quality of the data + type: string + enum: + - "vorlaeufig" + - "endgueltig" + +examples: + - type: json + description: Example entry for CPI data + model: verbraucherpreisindex + data: |- + [{ "wert": 99, "jahrMonat": "2022-00" }, + { "wert": 100, "jahrMonat": "2022-01" }, + { "wert": 101, "jahrMonat": "2022-02", "qualitaet": "vorlaeufig" }] + diff --git a/tests/test_examples_local_json_example.py b/tests/test_examples_examples_csv.py similarity index 64% rename from tests/test_examples_local_json_example.py rename to tests/test_examples_examples_csv.py index cbd73d9b..e47bf44f 100644 --- a/tests/test_examples_local_json_example.py +++ b/tests/test_examples_examples_csv.py @@ -11,13 +11,13 @@ logging.basicConfig(level=logging.DEBUG, force=True) def test_cli(): - result = runner.invoke(app, ["test", "--examples", "./examples/local-json-simple/datacontract_json_inline.yaml"]) + result = runner.invoke(app, ["test", "--examples", "./examples/examples/datacontract_csv.yaml"]) assert result.exit_code == 0 -def test_local_json(): - data_contract = DataContract(data_contract_file="examples/local-json-simple/datacontract_json_inline.yaml") - run = data_contract.testExample() +def test_csv(): + data_contract = DataContract(data_contract_file="examples/examples/datacontract_csv.yaml", examples=True) + run = data_contract.test() print(run) print(run.result) assert run.result == "passed" diff --git a/tests/test_examples_examples_inline.py b/tests/test_examples_examples_inline.py new file mode 100644 index 00000000..325a8ce8 --- /dev/null +++ b/tests/test_examples_examples_inline.py @@ -0,0 +1,25 @@ +import logging + +import pytest +from typer.testing import CliRunner + +from datacontract.cli import app +from datacontract.data_contract import DataContract + +runner = CliRunner() + +logging.basicConfig(level=logging.DEBUG, force=True) + +def test_cli(): + result = runner.invoke(app, ["test", "--examples", "./examples/examples/datacontract_inline.yaml"]) + assert result.exit_code == 0 + + +def test_json_inline(): + data_contract = DataContract(data_contract_file="examples/examples/datacontract_inline.yaml", examples=True) + run = data_contract.test() + print(run) + print(run.result) + assert run.result == "passed" + + diff --git a/tests/test_examples_local_json_example_csv.py b/tests/test_examples_examples_json.py similarity index 56% rename from tests/test_examples_local_json_example_csv.py rename to tests/test_examples_examples_json.py index ce626cc7..ace33729 100644 --- a/tests/test_examples_local_json_example_csv.py +++ b/tests/test_examples_examples_json.py @@ -1,3 +1,5 @@ +import logging + import pytest from typer.testing import CliRunner @@ -6,15 +8,16 @@ runner = CliRunner() +logging.basicConfig(level=logging.DEBUG, force=True) def test_cli(): - result = runner.invoke(app, ["test", "--examples", "./examples/local-json-simple/datacontract_csv.yaml"]) + result = runner.invoke(app, ["test", "--examples", "./examples/examples/datacontract_json.yaml"]) assert result.exit_code == 0 -def test_local_json(): - data_contract = DataContract(data_contract_file="examples/local-json-simple/datacontract_csv.yaml") - run = data_contract.testExample() +def test_json(): + data_contract = DataContract(data_contract_file="examples/examples/datacontract_json.yaml", examples=True) + run = data_contract.test() print(run) print(run.result) assert run.result == "passed" From 5eed62491509064ad52fdc3f23dffa3616a50a28 Mon Sep 17 00:00:00 2001 From: Simon Harrer Date: Wed, 14 Feb 2024 09:48:41 +0100 Subject: [PATCH 10/11] Add "test --examples" --- README.md | 3 +++ pyproject.toml | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 781a9e7f..c3ebcb08 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,9 @@ $ datacontract lint datacontract.yaml # execute schema and quality checks $ datacontract test datacontract.yaml +# execute schema and quality checks on the examples within the contract +$ datacontract test --examples datacontract.yaml + # find differences between to data contracts (Coming Soon) $ datacontract diff datacontract-v1.yaml datacontract-v2.yaml diff --git a/pyproject.toml b/pyproject.toml index 956e076a..ecf25aec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ dependencies = [ "soda-core-spark[databricks]~=3.1.5", "soda-core-spark-df~=3.1.5", "snowflake-connector-python[pandas]>=3.6,<3.8", - "duckdb>=0.9.3.dev3920", + "duckdb==0.9.2", "fastjsonschema~=2.19.1", "python-dotenv~=1.0.0", "s3fs==2024.2.0", From 13eb776375e87c38d42a7aef85bffc58797111c7 Mon Sep 17 00:00:00 2001 From: Simon Harrer Date: Wed, 14 Feb 2024 10:00:07 +0100 Subject: [PATCH 11/11] Add "test --examples" --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ecf25aec..f1121c58 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ dependencies = [ "soda-core-spark[databricks]~=3.1.5", "soda-core-spark-df~=3.1.5", "snowflake-connector-python[pandas]>=3.6,<3.8", - "duckdb==0.9.2", + "duckdb==0.9.3.dev3920", "fastjsonschema~=2.19.1", "python-dotenv~=1.0.0", "s3fs==2024.2.0",