From ff6abd1089bdf5ca82a35418747c6736518e719a Mon Sep 17 00:00:00 2001
From: James <james.baster@opendataservices.coop>
Date: Tue, 20 Dec 2022 11:31:12 +0000
Subject: [PATCH] actions: Test Codelists against schema

https://github.com/Open-Telecoms-Data/open-fibre-data-standard/issues/227
---
 requirements.in   |  1 +
 requirements.txt  |  3 ++-
 tests/test_csv.py | 43 ++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/requirements.in b/requirements.in
index 8870d5d..4d5e8d6 100644
--- a/requirements.in
+++ b/requirements.in
@@ -16,3 +16,4 @@ flattentool>=0.20
 pytest
 jscc
 mdformat
+jsonschema
diff --git a/requirements.txt b/requirements.txt
index e6a0152..5097fe0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@
 # This file is autogenerated by pip-compile with Python 3.9
 # by the following command:
 #
-#    pip-compile requirements.in
+#    pip-compile
 #
 alabaster==0.7.12
     # via sphinx
@@ -102,6 +102,7 @@ jsonref==1.0.1
     #   sphinxcontrib-opendataservices-jsonschema
 jsonschema==4.17.3
     # via
+    #   -r requirements.in
     #   jscc
     #   libcoveofds
 libcoveofds==0.5.0
diff --git a/tests/test_csv.py b/tests/test_csv.py
index b9243de..ea2c055 100644
--- a/tests/test_csv.py
+++ b/tests/test_csv.py
@@ -9,7 +9,8 @@
 from jscc.testing.checks import get_invalid_csv_files
 from jscc.testing.filesystem import walk_csv_data
 from jscc.testing.util import warn_and_assert
-
+from jsonschema import FormatChecker
+from jsonschema.validators import Draft4Validator as validator
 
 cwd = os.getcwd()
 
@@ -88,3 +89,43 @@ def test_valid():
             )
 
     assert errors == 0, "One or more codelist CSV files are invalid. See warnings below."
+
+def test_codelist():
+    """
+    Ensures all codelists files are valid against codelist-schema.json.
+    (Not organisationIdentifierScheme.csv - that comes from another source and has a different structure.)
+    """
+    exceptions = {
+    }
+
+    path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', 'schema', 'codelist-schema.json')
+    with open(path) as f:
+        codelist_schema = json.load(f)
+
+    any_errors = False
+
+    for path, name, text, fieldnames, rows in walk_csv_data():
+        codes_seen = set()
+        if is_codelist(fieldnames) and name != 'organisationIdentifierScheme.csv':
+            data = []
+            for row_index, row in enumerate(rows, 2):
+                code = row['Code']
+                if code in codes_seen:
+                    any_errors = True
+                    warnings.warn(f'{path}: Duplicate code "{code}" on row {row_index}')
+                codes_seen.add(code)
+
+                item = {}
+                for k, v in row.items():
+                    if k == 'Code' or v:
+                        item[k] = v
+                    else:
+                        item[k] = None
+                data.append(item)
+
+            for error in validator(codelist_schema, format_checker=FormatChecker()).iter_errors(data):
+                if error.message != exceptions.get(os.path.basename(path)):
+                    any_errors = True
+                    warnings.warn(f"{path}: {error.message} ({'/'.join(error.absolute_schema_path)})\n")
+
+    assert not any_errors