Skip to content

Commit

Permalink
actions: Test Codelists against schema
Browse files Browse the repository at this point in the history
  • Loading branch information
odscjames committed Dec 20, 2022
1 parent 93b2f65 commit ff6abd1
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 2 deletions.
1 change: 1 addition & 0 deletions requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ flattentool>=0.20
pytest
jscc
mdformat
jsonschema
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# This file is autogenerated by pip-compile with Python 3.9
# by the following command:
#
# pip-compile requirements.in
# pip-compile
#
alabaster==0.7.12
# via sphinx
Expand Down Expand Up @@ -102,6 +102,7 @@ jsonref==1.0.1
# sphinxcontrib-opendataservices-jsonschema
jsonschema==4.17.3
# via
# -r requirements.in
# jscc
# libcoveofds
libcoveofds==0.5.0
Expand Down
43 changes: 42 additions & 1 deletion tests/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from jscc.testing.checks import get_invalid_csv_files
from jscc.testing.filesystem import walk_csv_data
from jscc.testing.util import warn_and_assert

from jsonschema import FormatChecker
from jsonschema.validators import Draft4Validator as validator

cwd = os.getcwd()

Expand Down Expand Up @@ -88,3 +89,43 @@ def test_valid():
)

assert errors == 0, "One or more codelist CSV files are invalid. See warnings below."

def test_codelist():
"""
Ensures all codelists files are valid against codelist-schema.json.
(Not organisationIdentifierScheme.csv - that comes from another source and has a different structure.)
"""
exceptions = {
}

path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', 'schema', 'codelist-schema.json')
with open(path) as f:
codelist_schema = json.load(f)

any_errors = False

for path, name, text, fieldnames, rows in walk_csv_data():
codes_seen = set()
if is_codelist(fieldnames) and name != 'organisationIdentifierScheme.csv':
data = []
for row_index, row in enumerate(rows, 2):
code = row['Code']
if code in codes_seen:
any_errors = True
warnings.warn(f'{path}: Duplicate code "{code}" on row {row_index}')
codes_seen.add(code)

item = {}
for k, v in row.items():
if k == 'Code' or v:
item[k] = v
else:
item[k] = None
data.append(item)

for error in validator(codelist_schema, format_checker=FormatChecker()).iter_errors(data):
if error.message != exceptions.get(os.path.basename(path)):
any_errors = True
warnings.warn(f"{path}: {error.message} ({'/'.join(error.absolute_schema_path)})\n")

assert not any_errors

0 comments on commit ff6abd1

Please sign in to comment.