From ff6abd1089bdf5ca82a35418747c6736518e719a Mon Sep 17 00:00:00 2001 From: James Date: Tue, 20 Dec 2022 11:31:12 +0000 Subject: [PATCH] actions: Test Codelists against schema https://github.com/Open-Telecoms-Data/open-fibre-data-standard/issues/227 --- requirements.in | 1 + requirements.txt | 3 ++- tests/test_csv.py | 43 ++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 45 insertions(+), 2 deletions(-) diff --git a/requirements.in b/requirements.in index 8870d5d..4d5e8d6 100644 --- a/requirements.in +++ b/requirements.in @@ -16,3 +16,4 @@ flattentool>=0.20 pytest jscc mdformat +jsonschema diff --git a/requirements.txt b/requirements.txt index e6a0152..5097fe0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ # This file is autogenerated by pip-compile with Python 3.9 # by the following command: # -# pip-compile requirements.in +# pip-compile # alabaster==0.7.12 # via sphinx @@ -102,6 +102,7 @@ jsonref==1.0.1 # sphinxcontrib-opendataservices-jsonschema jsonschema==4.17.3 # via + # -r requirements.in # jscc # libcoveofds libcoveofds==0.5.0 diff --git a/tests/test_csv.py b/tests/test_csv.py index b9243de..ea2c055 100644 --- a/tests/test_csv.py +++ b/tests/test_csv.py @@ -9,7 +9,8 @@ from jscc.testing.checks import get_invalid_csv_files from jscc.testing.filesystem import walk_csv_data from jscc.testing.util import warn_and_assert - +from jsonschema import FormatChecker +from jsonschema.validators import Draft4Validator as validator cwd = os.getcwd() @@ -88,3 +89,43 @@ def test_valid(): ) assert errors == 0, "One or more codelist CSV files are invalid. See warnings below." + +def test_codelist(): + """ + Ensures all codelists files are valid against codelist-schema.json. + (Not organisationIdentifierScheme.csv - that comes from another source and has a different structure.) + """ + exceptions = { + } + + path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', 'schema', 'codelist-schema.json') + with open(path) as f: + codelist_schema = json.load(f) + + any_errors = False + + for path, name, text, fieldnames, rows in walk_csv_data(): + codes_seen = set() + if is_codelist(fieldnames) and name != 'organisationIdentifierScheme.csv': + data = [] + for row_index, row in enumerate(rows, 2): + code = row['Code'] + if code in codes_seen: + any_errors = True + warnings.warn(f'{path}: Duplicate code "{code}" on row {row_index}') + codes_seen.add(code) + + item = {} + for k, v in row.items(): + if k == 'Code' or v: + item[k] = v + else: + item[k] = None + data.append(item) + + for error in validator(codelist_schema, format_checker=FormatChecker()).iter_errors(data): + if error.message != exceptions.get(os.path.basename(path)): + any_errors = True + warnings.warn(f"{path}: {error.message} ({'/'.join(error.absolute_schema_path)})\n") + + assert not any_errors