-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add parsing of data-validation yaml files (#366)
Co-authored-by: Philip Hackstock <[email protected]>
- Loading branch information
1 parent
ff55b90
commit 9867535
Showing
9 changed files
with
138 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
from pathlib import Path | ||
from typing import List, Union | ||
|
||
import yaml | ||
|
||
from nomenclature import DataStructureDefinition | ||
from nomenclature.error import ErrorCollector | ||
from nomenclature.processor.iamc import IamcDataFilter | ||
from nomenclature.processor import Processor | ||
from nomenclature.processor.utils import get_relative_path | ||
|
||
|
||
class DataValidationCriteria(IamcDataFilter): | ||
"""Data validation criteria""" | ||
|
||
upper_bound: float = None | ||
lower_bound: float = None | ||
|
||
|
||
class DataValidator(Processor): | ||
"""Processor for validating IAMC datapoints""" | ||
|
||
criteria_items: List[DataValidationCriteria] | ||
file: Path | ||
|
||
@classmethod | ||
def from_file(cls, file: Union[Path, str]) -> "DataValidator": | ||
with open(file, "r") as f: | ||
content = yaml.safe_load(f) | ||
return cls(file=file, criteria_items=content) | ||
|
||
def apply(self): | ||
pass | ||
|
||
def validate_with_definition(self, dsd: DataStructureDefinition) -> None: | ||
errors = ErrorCollector() | ||
for data in self.criteria_items: | ||
try: | ||
data.validate_with_definition(dsd) | ||
except ValueError as value_error: | ||
errors.append(value_error) | ||
if errors: | ||
raise ValueError(f"In file {get_relative_path(self.file)}:\n{errors}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
- Common: | ||
- World |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
- Final Energy: | ||
unit: EJ/yr | ||
- Primary Energy: | ||
unit: EJ/yr | ||
- Emissions|CO2: | ||
unit: Mt CO2/yr | ||
- Emissions|CH4: | ||
unit: Mt CH4/yr |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
- variable: Final Energy | ||
region: Asia | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
- region: World | ||
variable: Final Energy | ||
year: 2010 | ||
upper_bound: 2.5 | ||
lower_bound: 1 | ||
|
5 changes: 5 additions & 0 deletions
5
tests/data/validation/validate_data/validation_unknown_region.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
- region: Asia | ||
variable: Final Energy | ||
year: 2010 | ||
upper_bound: 2.5 | ||
lower_bound: 1 |
5 changes: 5 additions & 0 deletions
5
tests/data/validation/validate_data/validation_unknown_variable.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
- region: World | ||
variable: Final Energy|Industry | ||
year: 2010 | ||
upper_bound: 2.5 | ||
lower_bound: 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
import pytest | ||
from conftest import TEST_DATA_DIR | ||
|
||
from nomenclature import DataStructureDefinition | ||
from nomenclature.processor.data_validator import DataValidator | ||
|
||
DATA_VALIDATION_TEST_DIR = TEST_DATA_DIR / "validation" / "validate_data" | ||
|
||
|
||
def test_DataValidator_from_file(): | ||
exp = DataValidator( | ||
**{ | ||
"criteria_items": [ | ||
{ | ||
"region": ["World"], | ||
"variable": "Final Energy", | ||
"year": [2010], | ||
"upper_bound": 2.5, | ||
"lower_bound": 1.0, # test that integer in yaml is cast to float | ||
} | ||
], | ||
"file": DATA_VALIDATION_TEST_DIR / "simple_validation.yaml", | ||
} | ||
) | ||
obs = DataValidator.from_file(DATA_VALIDATION_TEST_DIR / "simple_validation.yaml") | ||
assert obs == exp | ||
|
||
dsd = DataStructureDefinition(TEST_DATA_DIR / "validation" / "definition") | ||
assert obs.validate_with_definition(dsd) is None | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"dimension, match", | ||
[ | ||
("region", r"regions.*not defined.*\n.*Asia"), | ||
("variable", r"variables.*not defined.*\n.*Final Energy\|Industry"), | ||
], | ||
) | ||
def test_DataValidator_validate_with_definition_raises(dimension, match): | ||
# Testing two different failure cases | ||
# 1. Undefined region | ||
# 2. Undefined variable | ||
# TODO Undefined unit | ||
|
||
data_validator = DataValidator.from_file( | ||
DATA_VALIDATION_TEST_DIR / f"validation_unknown_{dimension}.yaml" | ||
) | ||
|
||
# validating against a DataStructure with all dimensions raises | ||
dsd = DataStructureDefinition(TEST_DATA_DIR / "validation" / "definition") | ||
with pytest.raises(ValueError, match=match): | ||
data_validator.validate_with_definition(dsd) | ||
|
||
# validating against a DataStructure without the offending dimension passes | ||
dsd = DataStructureDefinition( | ||
TEST_DATA_DIR / "validation" / "definition", | ||
dimensions=[dim for dim in ["region", "variable"] if dim != dimension], | ||
) | ||
assert data_validator.validate_with_definition(dsd) is None |