From 06d9e5212a47a95404bdb480901bea13dfe4348a Mon Sep 17 00:00:00 2001 From: MikeG Date: Thu, 18 Apr 2024 11:31:01 +0200 Subject: [PATCH] add option to ignore datatypes while validating (#265) * teach the `bluepysnap validate-circuit` and `bluepysnap validate-simulation` the ability to `--ignore-datatype-errors` so that mismatches of datatypes to the specification are ignored --- CHANGELOG.rst | 1 + bluepysnap/circuit_validation.py | 26 +++++--- bluepysnap/cli.py | 54 ++++++++-------- bluepysnap/schemas/schemas.py | 23 +++---- bluepysnap/simulation_validation.py | 4 +- tests/test_schema_validation_simulation.py | 2 +- tests/test_schemas.py | 71 ++++++++++++++++------ 7 files changed, 112 insertions(+), 69 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 03292641..b68027b7 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -15,6 +15,7 @@ Improvements - Added the possibility to query Edge IDs and Node IDs based on edge/node population type using query key ``population_type`` - the types conform to `node types `_ and `edge types `_ defined in the sonata specification +- teach the `bluepysnap validate-circuit` and `bluepysnap validate-simulation` the ability to `--ignore-datatype-errors` so that mismatches of datatypes to the specification are ignored Version v3.0.1 diff --git a/bluepysnap/circuit_validation.py b/bluepysnap/circuit_validation.py index 0040e412..c40443f7 100644 --- a/bluepysnap/circuit_validation.py +++ b/bluepysnap/circuit_validation.py @@ -481,7 +481,7 @@ def validate_edge_population(edges_file, name, nodes): return [] -def validate_edges_dict(edges_dict, nodes, skip_slow): +def validate_edges_dict(edges_dict, nodes, skip_slow, ignore_datatype_errors): """Validate an item in the "edges" list. Args: @@ -518,7 +518,9 @@ def _is_source_node_virtual(edges_dict, edge_population, nodes): virtual = False if pop_type == "chemical": virtual = _is_source_node_virtual(edges_dict, name, nodes) - errors += schemas.validate_edges_schema(edges_file, pop_type, virtual) + errors += schemas.validate_edges_schema( + edges_file, pop_type, virtual, ignore_datatype_errors + ) if not skip_slow: errors += validate_edge_population(edges_file, name, nodes) else: @@ -527,7 +529,7 @@ def _is_source_node_virtual(edges_dict, edge_population, nodes): return errors -def validate_nodes_dict(nodes_dict, components): +def validate_nodes_dict(nodes_dict, components, ignore_datatype_errors): """Validate an item in the "nodes" list. Args: @@ -544,7 +546,9 @@ def validate_nodes_dict(nodes_dict, components): nodes_file = nodes_dict["nodes_file"] if Path(nodes_file).is_file(): - errors = schemas.validate_nodes_schema(nodes_file, population["type"]) + errors = schemas.validate_nodes_schema( + nodes_file, population["type"], ignore_datatype_errors + ) errors += validate_node_population(nodes_file, population, pop_name) else: errors.append(BluepySnapValidationError.fatal(f'Invalid "nodes_file": {nodes_file}')) @@ -552,7 +556,7 @@ def validate_nodes_dict(nodes_dict, components): return errors -def validate_networks(config, skip_slow): +def validate_networks(config, skip_slow, ignore_datatype_errors): """Validate "networks" part of the config. Acts as a starting point of validation. @@ -566,15 +570,17 @@ def validate_networks(config, skip_slow): for nodes_dict in nodes: if "nodes_file" in nodes_dict: - errors += validate_nodes_dict(nodes_dict, components) + errors += validate_nodes_dict(nodes_dict, components, ignore_datatype_errors) for edges_dict in config["networks"].get("edges", []): if "edges_file" in edges_dict: - errors += validate_edges_dict(edges_dict, nodes, skip_slow) + errors += validate_edges_dict(edges_dict, nodes, skip_slow, ignore_datatype_errors) return errors -def validate(config_file, skip_slow, only_errors=False, print_errors=True): +def validate( + config_file, skip_slow, only_errors=False, print_errors=True, ignore_datatype_errors=False +): """Validates Sonata circuit. Args: @@ -587,10 +593,10 @@ def validate(config_file, skip_slow, only_errors=False, print_errors=True): set: set of errors, empty if no errors """ config = Parser.parse(load_json(config_file), str(Path(config_file).parent)) - errors = schemas.validate_circuit_schema(config_file, config) + errors = schemas.validate_circuit_schema(config_file, config, ignore_datatype_errors) if "networks" in config: - errors += validate_networks(config, skip_slow) + errors += validate_networks(config, skip_slow, ignore_datatype_errors) if _check_partial_circuit_config(config): message = ( diff --git a/bluepysnap/cli.py b/bluepysnap/cli.py index 669e8172..dc4e37cb 100644 --- a/bluepysnap/cli.py +++ b/bluepysnap/cli.py @@ -1,6 +1,5 @@ """The project's command line launcher.""" -import functools import logging import click @@ -22,45 +21,48 @@ def cli(verbose): ) -def circuit_validation_params(func): - """Small helper to have shared params.""" - - @click.argument("config_file", type=CLICK_EXISTING_FILE) - @click.option( - "--skip-slow/--no-skip-slow", - default=True, - help=( - "Skip slow checks; checking all edges refer to existing node ids, " - "edge indices are correct, etc" - ), - ) - @click.option("--only-errors", is_flag=True, help="Only print fatal errors (ignore warnings)") - @functools.wraps(func) - def wrapper(*args, **kwargs): - return func(*args, **kwargs) - - return wrapper - - @cli.command() -@circuit_validation_params -def validate_circuit(config_file, skip_slow, only_errors): +@click.argument("config_file", type=CLICK_EXISTING_FILE) +@click.option( + "--skip-slow/--no-skip-slow", + default=True, + help=( + "Skip slow checks; checking all edges refer to existing node ids, " + "edge indices are correct, etc" + ), +) +@click.option("--only-errors", is_flag=True, help="Only print fatal errors (ignore warnings)") +@click.option( + "--ignore-datatype-errors", + is_flag=True, + help="Ignore errors related to mismatch of datatypes: ie: float64 used instead of float32", +) +def validate_circuit(config_file, skip_slow, only_errors, ignore_datatype_errors): """Validate Sonata circuit based on config file. Args: config_file (str): path to Sonata circuit config file skip_slow (bool): skip slow tests only_errors (bool): only print fatal errors + ignore_datatype_errors (bool): ignore checks related to datatypes """ - circuit_validation.validate(config_file, skip_slow, only_errors) + circuit_validation.validate( + config_file, skip_slow, only_errors, ignore_datatype_errors=ignore_datatype_errors + ) @cli.command() @click.argument("config_file", type=CLICK_EXISTING_FILE) -def validate_simulation(config_file): +@click.option( + "--ignore-datatype-errors", + is_flag=True, + help="Ignore errors related to mismatch of datatypes: ie: float64 used instead of float32", +) +def validate_simulation(config_file, ignore_datatype_errors): """Validate Sonata simulation based on config file. Args: config_file (str): path to Sonata simulation config file + ignore_datatype_errors (bool): ignore checks related to datatypes """ - simulation_validation.validate(config_file) + simulation_validation.validate(config_file, ignore_datatype_errors=ignore_datatype_errors) diff --git a/bluepysnap/schemas/schemas.py b/bluepysnap/schemas/schemas.py index 1531b6aa..a9be0c66 100644 --- a/bluepysnap/schemas/schemas.py +++ b/bluepysnap/schemas/schemas.py @@ -35,7 +35,7 @@ def _parse_path(path, join_str): return join_str.join(error_path) -def _wrap_errors(filepath, schema_errors, join_str): +def _wrap_errors(filepath, schema_errors, join_str, ignore_datatype_errors): """Handles parsing of schema errors into more meaningful messages. Also wraps all the warngings and errors to single Error instances. @@ -55,8 +55,9 @@ def _wrap_errors(filepath, schema_errors, join_str): if not e.path: errors.append(e.message) elif e.path[-1] == "datatype": - path = _parse_path(list(e.path)[:-1], join_str) - warnings.append(f"incorrect datatype '{e.instance}' for '{path}': {e.message}") + if not ignore_datatype_errors: + path = _parse_path(list(e.path)[:-1], join_str) + warnings.append(f"incorrect datatype '{e.instance}' for '{path}': {e.message}") else: if e.schema_path[-1] in e.schema.get("messages", {}): path = _parse_path(e.path, join_str) @@ -165,7 +166,7 @@ def get_dataset_dtype(item): return properties -def validate_simulation_schema(path, config): +def validate_simulation_schema(path, config, ignore_datatype_errors): """Validates a simulation config against a schema. Args: @@ -177,10 +178,10 @@ def validate_simulation_schema(path, config): """ errors = _validate_schema_for_dict(_parse_schema("simulation"), config) - return _wrap_errors(path, errors, ".") + return _wrap_errors(path, errors, ".", ignore_datatype_errors) -def validate_circuit_schema(path, config): +def validate_circuit_schema(path, config, ignore_datatype_errors): """Validates a circuit config against a schema. Args: @@ -192,10 +193,10 @@ def validate_circuit_schema(path, config): """ errors = _validate_schema_for_dict(_parse_schema("circuit"), config) - return _wrap_errors(path, errors, ".") + return _wrap_errors(path, errors, ".", ignore_datatype_errors) -def validate_nodes_schema(path, nodes_type): +def validate_nodes_schema(path, nodes_type, ignore_datatype_errors): """Validates a nodes file against a schema. Args: @@ -210,10 +211,10 @@ def validate_nodes_schema(path, nodes_type): errors = _validate_schema_for_dict(_parse_schema("node", nodes_type), nodes_h5_dict) - return _wrap_errors(path, errors, "/") + return _wrap_errors(path, errors, "/", ignore_datatype_errors) -def validate_edges_schema(path, edges_type, virtual): +def validate_edges_schema(path, edges_type, virtual, ignore_datatype_errors): """Validates an edges file against a schema. Args: @@ -232,7 +233,7 @@ def validate_edges_schema(path, edges_type, virtual): errors = _validate_schema_for_dict(_parse_schema("edge", edges_type), edges_h5_dict) - return _wrap_errors(path, errors, "/") + return _wrap_errors(path, errors, "/", ignore_datatype_errors) def _resolve_types(resolver, types): diff --git a/bluepysnap/simulation_validation.py b/bluepysnap/simulation_validation.py index 63a23aa8..42e97960 100644 --- a/bluepysnap/simulation_validation.py +++ b/bluepysnap/simulation_validation.py @@ -491,7 +491,7 @@ def validate_config(config): return [error for section in sorted(VALIDATORS) for error in VALIDATORS[section](config)] -def validate(config_file, print_errors=True): +def validate(config_file, print_errors=True, ignore_datatype_errors=False): """Validate Sonata simulation config. Args: @@ -502,7 +502,7 @@ def validate(config_file, print_errors=True): set: set of errors, empty if no errors """ config = _parse_config(config_file) - errors = schemas.validate_simulation_schema(config_file, config) + errors = schemas.validate_simulation_schema(config_file, config, ignore_datatype_errors) config = _add_validation_parameters(config, config_file) errors += validate_config(config) diff --git a/tests/test_schema_validation_simulation.py b/tests/test_schema_validation_simulation.py index f985ac0b..235c23e9 100644 --- a/tests/test_schema_validation_simulation.py +++ b/tests/test_schema_validation_simulation.py @@ -75,7 +75,7 @@ def _validate(config): Schema mocked to not have to parse it from file for every validation. """ - return test_module.validate_circuit_schema(CONFIG_FILE, config) + return test_module.validate_circuit_schema(CONFIG_FILE, config, ignore_datatype_errors=False) def _remove_from_config(config, to_remove): diff --git a/tests/test_schemas.py b/tests/test_schemas.py index 71f05836..8fae437e 100644 --- a/tests/test_schemas.py +++ b/tests/test_schemas.py @@ -135,7 +135,7 @@ def test__get_h5_structure_as_dict_library_entries(tmp_path): def test_validate_config_ok(): config = str(TEST_DATA_DIR / "circuit_config.json") - res = test_module.validate_circuit_schema("fake_path", config) + res = test_module.validate_circuit_schema("fake_path", config, ignore_datatype_errors=False) assert len(res) == 0 @@ -155,7 +155,9 @@ def test_validate_config_ok_missing_optional_fields(to_remove): for key in to_remove[:-1]: c = c[key] del c[to_remove[-1]] - errors = test_module.validate_circuit_schema(str(config_copy_path), config) + errors = test_module.validate_circuit_schema( + str(config_copy_path), config, ignore_datatype_errors=False + ) assert len(errors) == 0 @@ -200,7 +202,9 @@ def test_validate_config_error(to_remove_list, expected): for key in to_remove[:-1]: c = c[key] del c[to_remove[-1]] - errors = test_module.validate_circuit_schema(str(config_copy_path), config) + errors = test_module.validate_circuit_schema( + str(config_copy_path), config, ignore_datatype_errors=False + ) assert len(errors) == 1 assert errors[0] == BluepySnapValidationError.fatal(f"{config_copy_path}:\n\t{expected}") @@ -208,14 +212,17 @@ def test_validate_config_error(to_remove_list, expected): def test_validate_nodes_ok(): errors = test_module.validate_nodes_schema( - str(TEST_DATA_DIR / "nodes_single_pop.h5"), "biophysical" + str(TEST_DATA_DIR / "nodes_single_pop.h5"), "biophysical", ignore_datatype_errors=False ) assert len(errors) == 0 def test_validate_edges_ok(): errors = test_module.validate_edges_schema( - str(TEST_DATA_DIR / "edges_single_pop.h5"), "chemical", virtual=False + str(TEST_DATA_DIR / "edges_single_pop.h5"), + "chemical", + virtual=False, + ignore_datatype_errors=False, ) assert len(errors) == 0 @@ -250,7 +257,9 @@ def test_validate_nodes_biophysical_missing_required(missing): nodes_file = circuit_copy_path / "nodes_single_pop.h5" with h5py.File(nodes_file, "r+") as h5f: del h5f[missing] - errors = test_module.validate_nodes_schema(str(nodes_file), "biophysical") + errors = test_module.validate_nodes_schema( + str(nodes_file), "biophysical", ignore_datatype_errors=False + ) assert len(errors) == 1 assert f"'{missing.split('/')[-1]}' is a required property" in errors[0].message @@ -301,7 +310,9 @@ def test_validate_edges_chemical_missing_required(missing): edges_file = circuit_copy_path / "edges_single_pop.h5" with h5py.File(edges_file, "r+") as h5f: del h5f[missing] - errors = test_module.validate_edges_schema(str(edges_file), "chemical", virtual=False) + errors = test_module.validate_edges_schema( + str(edges_file), "chemical", virtual=False, ignore_datatype_errors=False + ) assert len(errors) == 1 assert f"'{missing.split('/')[-1]}' is a required property" in errors[0].message @@ -311,7 +322,9 @@ def test_missing_edge_population(): edges_file = circuit_copy_path / "edges_single_pop.h5" with h5py.File(edges_file, "r+") as h5f: del h5f["edges/default"] - errors = test_module.validate_edges_schema(str(edges_file), "chemical", virtual=False) + errors = test_module.validate_edges_schema( + str(edges_file), "chemical", virtual=False, ignore_datatype_errors=False + ) assert len(errors) == 1 assert errors[0] == BluepySnapValidationError.fatal( f"{str(edges_file)}:\n\tedges: too few properties" @@ -323,7 +336,9 @@ def test_missing_node_population(): nodes_file = circuit_copy_path / "nodes_single_pop.h5" with h5py.File(nodes_file, "r+") as h5f: del h5f["nodes/default"] - errors = test_module.validate_nodes_schema(str(nodes_file), "biophysical") + errors = test_module.validate_nodes_schema( + str(nodes_file), "biophysical", ignore_datatype_errors=False + ) assert len(errors) == 1 assert errors[0] == BluepySnapValidationError.fatal( f"{str(nodes_file)}:\n\tnodes: too few properties" @@ -335,7 +350,9 @@ def test_2_edge_populations(): edges_file = circuit_copy_path / "edges_single_pop.h5" with h5py.File(edges_file, "r+") as h5f: h5f["edges/default2"] = h5f["edges/default"] - errors = test_module.validate_edges_schema(str(edges_file), "chemical", virtual=False) + errors = test_module.validate_edges_schema( + str(edges_file), "chemical", virtual=False, ignore_datatype_errors=False + ) assert len(errors) == 0 @@ -344,7 +361,9 @@ def test_2_node_populations(): nodes_file = circuit_copy_path / "nodes_single_pop.h5" with h5py.File(nodes_file, "r+") as h5f: h5f["nodes/default2"] = h5f["nodes/default"] - errors = test_module.validate_nodes_schema(str(nodes_file), "biophysical") + errors = test_module.validate_nodes_schema( + str(nodes_file), "biophysical", ignore_datatype_errors=False + ) assert len(errors) == 0 @@ -355,7 +374,9 @@ def test_virtual_edge_population_ok(): with h5py.File(edges_file, "r+") as h5f: for r in to_remove: del h5f[f"edges/default/0/{r}"] - errors = test_module.validate_edges_schema(str(edges_file), "chemical", virtual=True) + errors = test_module.validate_edges_schema( + str(edges_file), "chemical", virtual=True, ignore_datatype_errors=False + ) assert len(errors) == 0 @@ -363,8 +384,10 @@ def test_virtual_edge_population_error(): with copy_test_data() as (circuit_copy_path, _): edges_file = circuit_copy_path / "edges_single_pop.h5" with h5py.File(edges_file, "r+") as h5f: - del h5f[f"edges/default/0/afferent_center_x"] - errors = test_module.validate_edges_schema(str(edges_file), "chemical", virtual=True) + del h5f["edges/default/0/afferent_center_x"] + errors = test_module.validate_edges_schema( + str(edges_file), "chemical", virtual=True, ignore_datatype_errors=False + ) assert len(errors) == 1 assert "'afferent_center_x' is a required property" in errors[0].message @@ -376,7 +399,9 @@ def test_virtual_node_population_ok(): with h5py.File(nodes_file, "r+") as h5f: for r in to_remove: del h5f[f"nodes/default/0/{r}"] - errors = test_module.validate_nodes_schema(str(nodes_file), "virtual") + errors = test_module.validate_nodes_schema( + str(nodes_file), "virtual", ignore_datatype_errors=False + ) assert len(errors) == 0 @@ -385,7 +410,9 @@ def test_virtual_node_population_error(): nodes_file = circuit_copy_path / "nodes_single_pop.h5" with h5py.File(nodes_file, "r+") as h5f: del h5f["nodes/default/0/model_type"] - errors = test_module.validate_nodes_schema(str(nodes_file), "virtual") + errors = test_module.validate_nodes_schema( + str(nodes_file), "virtual", ignore_datatype_errors=False + ) assert len(errors) == 1 assert "'model_type' is a required property" in errors[0].message @@ -396,7 +423,9 @@ def test_validate_edges_missing_attributes_field(): edges_file = circuit_copy_path / "edges_single_pop.h5" with h5py.File(edges_file, "r+") as h5f: del h5f["edges/default/source_node_id"].attrs["node_population"] - errors = test_module.validate_edges_schema(str(edges_file), "chemical", virtual=False) + errors = test_module.validate_edges_schema( + str(edges_file), "chemical", virtual=False, ignore_datatype_errors=False + ) assert len(errors) == 1 assert "missing required attribute(s) ['node_population']" in errors[0].message @@ -408,7 +437,9 @@ def test_validate_edges_missing_attribute(): with h5py.File(edges_file, "r+") as h5f: del h5f["edges/default/source_node_id"].attrs["node_population"] h5f["edges/default/source_node_id"].attrs.create("population", "some_val") - errors = test_module.validate_edges_schema(str(edges_file), "chemical", virtual=False) + errors = test_module.validate_edges_schema( + str(edges_file), "chemical", virtual=False, ignore_datatype_errors=False + ) assert len(errors) == 1 assert "missing required attribute(s) ['node_population']" in errors[0].message @@ -428,7 +459,9 @@ def test_wrong_datatype(field): with h5py.File(edges_file, "r+") as h5f: del h5f[field] h5f.create_dataset(field, data=[0], dtype="i2") - errors = test_module.validate_edges_schema(str(edges_file), "chemical", virtual=False) + errors = test_module.validate_edges_schema( + str(edges_file), "chemical", virtual=False, ignore_datatype_errors=False + ) assert len(errors) == 1 assert errors[0].level == BluepySnapValidationError.WARNING assert f"incorrect datatype 'int16' for '{field}'" in errors[0].message