Skip to content

Commit

Permalink
Add "test --examples"
Browse files Browse the repository at this point in the history
  • Loading branch information
simonharrer committed Feb 14, 2024
1 parent d21a145 commit 9899ac9
Show file tree
Hide file tree
Showing 8 changed files with 128 additions and 101 deletions.
8 changes: 1 addition & 7 deletions datacontract/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,7 @@ def test(
Run schema and quality tests on configured servers.
"""
print(f"Testing {location}")
if examples:
print(f"Using examples")
run = DataContract(data_contract_file=location).testExample()
_handle_result(run)
return

run = DataContract(data_contract_file=location, publish_url=publish).test()
run = DataContract(data_contract_file=location, publish_url=publish, examples=examples).test()
_handle_result(run)


Expand Down
138 changes: 54 additions & 84 deletions datacontract/data_contract.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,15 @@ def __init__(
data_contract_str: str = None,
data_contract: DataContractSpecification = None,
server: str = None,
examples: bool = False,
publish_url: str = None,
spark: str = None,
):
self._data_contract_file = data_contract_file
self._data_contract_str = data_contract_str
self._data_contract = data_contract
self._server = server
self._examples = examples
self._publish_url = publish_url
self._spark = spark

Expand Down Expand Up @@ -86,20 +88,27 @@ def test(self) -> Run:

check_that_datacontract_contains_valid_server_configuration(run, data_contract, self._server)
# TODO check yaml contains models
server_name = list(data_contract.servers.keys())[0]
server = data_contract.servers.get(server_name)
run.log_info(f"Running tests for data contract {data_contract.id} with server {server_name}")
run.dataContractId = data_contract.id
run.dataContractVersion = data_contract.info.version
run.dataProductId = server.dataProductId
run.outputPortId = server.outputPortId
run.server = server_name

# 5. check server is supported type
# 6. check server credentials are complete
if server.format == "json":
check_jsonschema(run, data_contract, server)
check_soda_execute(run, data_contract, server, self._spark)
with tempfile.TemporaryDirectory(prefix="datacontract-cli") as tmp_dir:
if self._examples:
server_name = "examples"
server = self._get_examples_server(data_contract, run, tmp_dir)
else:
server_name = list(data_contract.servers.keys())[0]
server = data_contract.servers.get(server_name)

run.log_info(f"Running tests for data contract {data_contract.id} with server {server_name}")
run.dataContractId = data_contract.id
run.dataContractVersion = data_contract.info.version
run.dataProductId = server.dataProductId
run.outputPortId = server.outputPortId
run.server = server_name

# 5. check server is supported type
# 6. check server credentials are complete
if server.format == "json":
check_jsonschema(run, data_contract, server)
check_soda_execute(run, data_contract, server, self._spark)

except DataContractException as e:
run.checks.append(Check(
Expand Down Expand Up @@ -129,77 +138,6 @@ def test(self) -> Run:

return run

def testExample(self) -> Run:
run = Run.create_run()
try:
run.log_info(f"Testing data contract")
data_contract = resolve.resolve_data_contract(self._data_contract_file, self._data_contract_str,
self._data_contract)

# TODO check yaml contains models
run.log_info(f"Running tests for data contract {data_contract.id} ({data_contract.info.version}) against examples")
run.dataContractId = data_contract.id
run.dataContractVersion = data_contract.info.version
run.server = "examples"

with tempfile.TemporaryDirectory(prefix="datacontract-cli") as tmp_dir:
run.log_info(f"Copying examples to files in temporary directory {tmp_dir}")
format = "json"

for example in data_contract.examples:
format = example.type
p = f"{tmp_dir}/{example.model}.{format}"
run.log_info(f"Creating example file {p}")
with open(p, "w") as f:
content = ""
if format == "json" and type(example.data) is list:
content = json.dumps(example.data)
elif format == "json" and type(example.data) is str:
content = example.data
elif format == "yaml" and type(example.data) is list:
content = yaml.dump(example.data)
elif format == "yaml" and type(example.data) is str:
content = example.data
elif format == "csv":
content = example.data
logging.debug(f"Content: {content}")
f.write(content)
path = f"{tmp_dir}" + "/{model}." + format
delimiter = "array"

server = Server(
type="local",
path=path,
format=format,
delimiter=delimiter,
)
print(server)
run.log_info(f"Using {server} for testing the examples")

# 5. check server is supported type
# 6. check server credentials are complete
if server.format == "json":
check_jsonschema(run, data_contract, server)
check_soda_execute(run, data_contract, server, self._spark)

except DataContractException as e:
run.checks.append(Check(
type=e.type,
result=e.result,
name=e.name,
reason=e.reason,
engine=e.engine,
details=""
))
run.log_error(str(e))


run.finish()

if self._publish_url is not None:
publish_datamesh_manager(run, self._publish_url)

return run

def diff(self, other):
pass
Expand All @@ -216,3 +154,35 @@ def export(self, export_format) -> str:
else:
print(f"Export format {export_format} not supported.")
return ""

def _get_examples_server(self, data_contract, run, tmp_dir):
run.log_info(f"Copying examples to files in temporary directory {tmp_dir}")
format = "json"
for example in data_contract.examples:
format = example.type
p = f"{tmp_dir}/{example.model}.{format}"
run.log_info(f"Creating example file {p}")
with open(p, "w") as f:
content = ""
if format == "json" and type(example.data) is list:
content = json.dumps(example.data)
elif format == "json" and type(example.data) is str:
content = example.data
elif format == "yaml" and type(example.data) is list:
content = yaml.dump(example.data)
elif format == "yaml" and type(example.data) is str:
content = example.data
elif format == "csv":
content = example.data
logging.debug(f"Content of example file {p}: {content}")
f.write(content)
path = f"{tmp_dir}" + "/{model}." + format
delimiter = "array"
server = Server(
type="local",
path=path,
format=format,
delimiter=delimiter,
)
run.log_info(f"Using {server} for testing the examples")
return server
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ models:
- "endgueltig"

examples:
- type: json
description: Example entry for CPI data
- description: Example entry for CPI data
type: json # TODO should be inline
model: verbraucherpreisindex
data:
- wert: 99
Expand Down
35 changes: 35 additions & 0 deletions tests/examples/examples/datacontract_json.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
dataContractSpecification: 0.9.2
id: "61111-0002"
info:
title: "Verbraucherpreisindex: Deutschland, Monate"
description: A data contract for the distribution and use of the German Consumer Price Index data.
version: 1.0.0
owner: my-domain-team
models:
verbraucherpreisindex:
description: Model representing the Consumer Price Index for Germany
fields:
wert:
description: Value of the Consumer Price Index
type: integer
required: true
jahrMonat:
description: Year and month of the data
type: string
required: true
qualitaet:
description: Quality of the data
type: string
enum:
- "vorlaeufig"
- "endgueltig"

examples:
- type: json
description: Example entry for CPI data
model: verbraucherpreisindex
data: |-
[{ "wert": 99, "jahrMonat": "2022-00" },
{ "wert": 100, "jahrMonat": "2022-01" },
{ "wert": 101, "jahrMonat": "2022-02", "qualitaet": "vorlaeufig" }]
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@
logging.basicConfig(level=logging.DEBUG, force=True)

def test_cli():
result = runner.invoke(app, ["test", "--examples", "./examples/local-json-simple/datacontract_json_inline.yaml"])
result = runner.invoke(app, ["test", "--examples", "./examples/examples/datacontract_csv.yaml"])
assert result.exit_code == 0


def test_local_json():
data_contract = DataContract(data_contract_file="examples/local-json-simple/datacontract_json_inline.yaml")
run = data_contract.testExample()
def test_csv():
data_contract = DataContract(data_contract_file="examples/examples/datacontract_csv.yaml", examples=True)
run = data_contract.test()
print(run)
print(run.result)
assert run.result == "passed"
Expand Down
25 changes: 25 additions & 0 deletions tests/test_examples_examples_inline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import logging

import pytest
from typer.testing import CliRunner

from datacontract.cli import app
from datacontract.data_contract import DataContract

runner = CliRunner()

logging.basicConfig(level=logging.DEBUG, force=True)

def test_cli():
result = runner.invoke(app, ["test", "--examples", "./examples/examples/datacontract_inline.yaml"])
assert result.exit_code == 0


def test_json_inline():
data_contract = DataContract(data_contract_file="examples/examples/datacontract_inline.yaml", examples=True)
run = data_contract.test()
print(run)
print(run.result)
assert run.result == "passed"


Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import logging

import pytest
from typer.testing import CliRunner

Expand All @@ -6,15 +8,16 @@

runner = CliRunner()

logging.basicConfig(level=logging.DEBUG, force=True)

def test_cli():
result = runner.invoke(app, ["test", "--examples", "./examples/local-json-simple/datacontract_csv.yaml"])
result = runner.invoke(app, ["test", "--examples", "./examples/examples/datacontract_json.yaml"])
assert result.exit_code == 0


def test_local_json():
data_contract = DataContract(data_contract_file="examples/local-json-simple/datacontract_csv.yaml")
run = data_contract.testExample()
def test_json():
data_contract = DataContract(data_contract_file="examples/examples/datacontract_json.yaml", examples=True)
run = data_contract.test()
print(run)
print(run.result)
assert run.result == "passed"
Expand Down

0 comments on commit 9899ac9

Please sign in to comment.