From 4667a278b039535cb1fe7b75f02948e5a4cbcfb6 Mon Sep 17 00:00:00 2001 From: David Almeida <58078834+dc-almeida@users.noreply.github.com> Date: Thu, 31 Oct 2024 15:45:02 +0100 Subject: [PATCH] Add dimensions option to CLI validate-scenarios (#421) * Add dimensions option to CLI validate-scenarios * Add dimensions from nomenclature.yaml to CLI validate-project * Update tests * Revert changes to validate-project testing * Revert default dimensions checks for CLI validate-scenarios and update tests --- nomenclature/cli.py | 22 ++++++++- .../structure_validation/nomenclature.yaml | 3 ++ tests/test_cli.py | 48 +++++++++++++++---- 3 files changed, 62 insertions(+), 11 deletions(-) create mode 100644 tests/data/cli/structure_validation/nomenclature.yaml diff --git a/nomenclature/cli.py b/nomenclature/cli.py index 18ef895d..1a0f45e5 100644 --- a/nomenclature/cli.py +++ b/nomenclature/cli.py @@ -287,19 +287,37 @@ def cli_run_workflow( type=click.Path(exists=True, path_type=Path), default="definitions", ) -def cli_validate_scenarios(input_file: Path, definitions: Path): +@click.option( + "--dimension", + "dimensions", + help="Optional list of dimensions", + type=str, + multiple=True, + default=None, +) +def cli_validate_scenarios(input_file: Path, definitions: Path, dimensions: List[str]): """Validate a scenario file against the codelists of a project + Example + ------- + $ nomenclature validate-scenarios + --definitions + --dimension + --dimension + --dimension + Parameters ---------- input_file : Path Input data file, must be IAMC format, .xlsx or .csv definitions : Path Definitions folder with codelists, by default "definitions" + dimensions : List[str], optional + Dimensions to be checked, defaults to all sub-folders of `definitions` Raises ------ ValueError If input_file validation fails against specified codelist(s). """ - DataStructureDefinition(definitions).validate(IamDataFrame(input_file)) + DataStructureDefinition(definitions, dimensions).validate(IamDataFrame(input_file)) diff --git a/tests/data/cli/structure_validation/nomenclature.yaml b/tests/data/cli/structure_validation/nomenclature.yaml new file mode 100644 index 00000000..83535f55 --- /dev/null +++ b/tests/data/cli/structure_validation/nomenclature.yaml @@ -0,0 +1,3 @@ +dimensions: + - region + - variable diff --git a/tests/test_cli.py b/tests/test_cli.py index 9eaee1ff..ce698579 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -423,10 +423,15 @@ def test_cli_run_workflow(tmp_path, simple_df): @pytest.mark.parametrize( - "status, unit, exit_code", [("valid", "EJ/yr", 0), ("invalid", "EJ", 1)] + "status, unit, dimensions, exit_code", + [ + ("valid_1", "EJ/yr", ["region", "variable"], 0), + ("invalid", "EJ", "variable", 1), + ("valid_2", "EJ", "region", 0), + ], ) -def test_cli_valid_scenarios(status, unit, exit_code, tmp_path): - """Check that CLI validates an IAMC dataset according to defined codelist.""" +def test_cli_valid_scenarios(status, unit, exit_code, dimensions, tmp_path): + """Check that CLI validates an IAMC dataset according to defined codelists.""" IamDataFrame( pd.DataFrame( [ @@ -435,17 +440,42 @@ def test_cli_valid_scenarios(status, unit, exit_code, tmp_path): columns=IAMC_IDX + [2005, 2010], ) ).to_excel(tmp_path / f"{status}_data.xlsx") + dimensions = [dimensions] if isinstance(dimensions, str) else dimensions + dimension_args = [] + for dim in dimensions: + dimension_args.append("--dimension") + dimension_args.append(dim) + result_valid = runner.invoke( cli, [ "validate-scenarios", str(tmp_path / f"{status}_data.xlsx"), "--definitions", - str( - MODULE_TEST_DATA_DIR - / "structure_validation_no_mappings" - / "definitions" - ), - ], + str(MODULE_TEST_DATA_DIR / "structure_validation" / "definitions"), + ] + + dimension_args, ) assert result_valid.exit_code == exit_code + + +def test_cli_valid_scenarios_implicit_dimensions(tmp_path): + """Check that CLI validates an IAMC dataset according to implicit dimensions codelists.""" + IamDataFrame( + pd.DataFrame( + [ + ["m_a", "s_a", "World", "Primary Energy", "EJ/yr", 1, 2], + ], + columns=IAMC_IDX + [2005, 2010], + ) + ).to_excel(tmp_path / "valid_data.xlsx") + result_valid = runner.invoke( + cli, + [ + "validate-scenarios", + str(tmp_path / "valid_data.xlsx"), + "--definitions", + str(MODULE_TEST_DATA_DIR / "structure_validation" / "definitions"), + ], + ) + assert result_valid.exit_code == 0