Skip to content

Commit

Permalink
Support a custom schema
Browse files Browse the repository at this point in the history
  • Loading branch information
jochenchrist committed Feb 21, 2024
1 parent d357019 commit 86f810b
Show file tree
Hide file tree
Showing 9 changed files with 734 additions and 19 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]

### Added
- Export to dbt models
- Export to dbt models (#37).
- test - show a test summary table.
- lint - Support local schema (#46).

## [0.9.4] - 2024-02-18

Expand Down
12 changes: 9 additions & 3 deletions datacontract/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def init(
try:
download_datacontract_file(location, template, overwrite)
except FileExistsException:
print("File already exists, use --overwrite-file to overwrite")
print("File already exists, use --overwrite to overwrite")
raise typer.Exit(code=1)
else:
print("📄 data contract written to " + location)
Expand All @@ -73,18 +73,22 @@ def init(
def lint(
location: Annotated[
str, typer.Argument(help="The location (url or path) of the data contract yaml.")] = "datacontract.yaml",
schema: Annotated[
str, typer.Argument(help="The location (url or path) of the Data Contract Specification JSON Schema")] = "https://datacontract.com/datacontract.schema.json",
):
"""
Validate that the datacontract.yaml is correctly formatted.
"""
run = DataContract(data_contract_file=location).lint()
run = DataContract(data_contract_file=location, schema_location=schema).lint()
_handle_result(run)


@app.command()
def test(
location: Annotated[
str, typer.Argument(help="The location (url or path) of the data contract yaml.")] = "datacontract.yaml",
schema: Annotated[
str, typer.Argument(help="The location (url or path) of the Data Contract Specification JSON Schema")] = "https://datacontract.com/datacontract.schema.json",
server: Annotated[str, typer.Option(
help="The server configuration to run the schema and quality tests. "
"Use the key of the server object in the data contract yaml file "
Expand All @@ -101,7 +105,9 @@ def test(
Run schema and quality tests on configured servers.
"""
print(f"Testing {location}")
run = DataContract(data_contract_file=location, publish_url=publish, examples=examples).test()
if server == "all":
server = None
run = DataContract(data_contract_file=location, schema_location=schema, publish_url=publish, server=server, examples=examples).test()
if logs:
_print_logs(run)
_handle_result(run)
Expand Down
7 changes: 5 additions & 2 deletions datacontract/data_contract.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import logging
import tempfile

import yaml

from datacontract.engines.datacontract.check_that_datacontract_contains_valid_servers_configuration import \
Expand Down Expand Up @@ -28,6 +29,7 @@ def __init__(
data_contract_file: str = None,
data_contract_str: str = None,
data_contract: DataContractSpecification = None,
schema_location: str = None,
server: str = None,
examples: bool = False,
publish_url: str = None,
Expand All @@ -36,6 +38,7 @@ def __init__(
self._data_contract_file = data_contract_file
self._data_contract_str = data_contract_str
self._data_contract = data_contract
self._schema_location = schema_location
self._server = server
self._examples = examples
self._publish_url = publish_url
Expand All @@ -46,7 +49,7 @@ def lint(self):
try:
run.log_info("Linting data contract")
data_contract = resolve.resolve_data_contract(self._data_contract_file, self._data_contract_str,
self._data_contract)
self._data_contract, self._schema_location)
run.checks.append(Check(
type="lint",
result="passed",
Expand Down Expand Up @@ -83,7 +86,7 @@ def test(self) -> Run:
try:
run.log_info(f"Testing data contract")
data_contract = resolve.resolve_data_contract(self._data_contract_file, self._data_contract_str,
self._data_contract)
self._data_contract, self._schema_location)

check_that_datacontract_contains_valid_server_configuration(run, data_contract, self._server)
# TODO check yaml contains models
Expand Down
17 changes: 9 additions & 8 deletions datacontract/lint/resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@ def resolve_data_contract(
data_contract_location: str = None,
data_contract_str: str = None,
data_contract: DataContractSpecification = None,
schema_location: str = None,
) -> DataContractSpecification:
if data_contract_location is not None:
return resolve_data_contract_from_location(data_contract_location)
return resolve_data_contract_from_location(data_contract_location, schema_location)
elif data_contract_str is not None:
return resolve_data_contract_from_str(data_contract_str)
return resolve_data_contract_from_str(data_contract_str, schema_location)
elif data_contract is not None:
return data_contract
else:
Expand All @@ -33,17 +34,17 @@ def resolve_data_contract(
)


def resolve_data_contract_from_location(location) -> DataContractSpecification:
def resolve_data_contract_from_location(location, schema_location: str = None) -> DataContractSpecification:
if location.startswith("http://") or location.startswith("https://"):
data_contract_str = fetch_resource(location)
else:
data_contract_str = read_file(location)
return resolve_data_contract_from_str(data_contract_str)
return resolve_data_contract_from_str(data_contract_str, schema_location)


def resolve_data_contract_from_str(data_contract_str):
def resolve_data_contract_from_str(data_contract_str, schema_location: str = None):
data_contract_yaml_dict = to_yaml(data_contract_str)
validate(data_contract_yaml_dict)
validate(data_contract_yaml_dict, schema_location)
return DataContractSpecification(**data_contract_yaml_dict)


Expand All @@ -62,8 +63,8 @@ def to_yaml(data_contract_str):
)


def validate(data_contract_yaml):
schema = fetch_schema()
def validate(data_contract_yaml, schema_location: str = None):
schema = fetch_schema(schema_location)
try:
fastjsonschema.validate(schema, data_contract_yaml)
logging.debug("YAML data is valid.")
Expand Down
29 changes: 25 additions & 4 deletions datacontract/lint/schema.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,28 @@
import json
import os

import requests

from datacontract.model.exceptions import DataContractException


def fetch_schema(location: str = None):

if location is None:
location = "https://datacontract.com/datacontract.schema.json"

def fetch_schema():
schema_url = "https://datacontract.com/datacontract.schema.json"
response = requests.get(schema_url)
return response.json()
if location.startswith("http://") or location.startswith("https://"):
response = requests.get(location)
return response.json()
else:
if not os.path.exists(location):
raise DataContractException(
type="lint",
name=f"Reading schema from {path}",
reason=f"The file '{path}' does not exist.",
engine="datacontract",
result="error"
)
with open(location, 'r') as file:
file_content = file.read()
return json.loads(file_content)
Loading

0 comments on commit 86f810b

Please sign in to comment.