Skip to content

Commit

Permalink
Merge pull request #1 from DSD-DBS/validation
Browse files Browse the repository at this point in the history
Validation for Onthology
  • Loading branch information
tklockau authored Oct 20, 2023
2 parents 69c1b33 + 578bbfb commit a089bd0
Show file tree
Hide file tree
Showing 29 changed files with 1,640 additions and 3 deletions.
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ classifiers = [
dependencies = [
"jsonschema>=4.4.0",
"fastjsonschema>=2.16.2",
"raillabel>=3.1.0"
"raillabel>=3.1.0",
"pyyaml>=6.0.0"
]

[project.urls]
Expand Down
1 change: 1 addition & 0 deletions raillabel_providerkit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from . import format
from .convert import loader_classes
from .convert.convert import convert
from .validation.validate import validate

try:
__version__ = metadata.version("raillabel-providerkit")
Expand Down
6 changes: 6 additions & 0 deletions raillabel_providerkit/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,9 @@ class SchemaError(Exception):
"""Raised when the data does not validate against a given schema."""

__module__ = "raillabel_providerkit"


class OnthologySchemaError(Exception):
"""Raised when the .yaml-file provided is not valid against the schema."""

__module__ = "raillabel_providerkit"
2 changes: 1 addition & 1 deletion raillabel_providerkit/format/understand_ai/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def to_raillabel(self) -> dict:

def _get_subschema_version(self) -> str:
RAILLABEL_SCHEMA_PATH = (
Path(__file__).parent.parent.parent / "validate" / "raillabel_schema.json"
Path(__file__).parent.parent.parent / "format" / "raillabel_schema.json"
)

with RAILLABEL_SCHEMA_PATH.open() as schema_file:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0
"""Package for validating raillabel data regarding the format requirements."""

from .validate_onthology.validate_onthology import validate_onthology
35 changes: 35 additions & 0 deletions raillabel_providerkit/validation/validate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0

import typing as t
from pathlib import Path

import raillabel

from . import validate_onthology


def validate(scene: raillabel.Scene, onthology: t.Union[dict, Path]) -> t.List[str]:
"""Validate a scene based on the Deutsche Bahn Requirements.
Parameters
----------
scene : raillabel.Scene
The scene containing the annotations.
onthology : dict or Path
Onthology YAML-data or file containing a information about all classes and their
attributes. The onthology must adhere to the onthology_schema. If a path is provided, the
file is loaded as a YAML.
Returns
-------
list[str]
list of all requirement errors in the scene. If an empty list is returned, then there are
no errors present and the scene is valid.
"""

errors = []

errors += validate_onthology(scene, onthology)

return errors
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0
"""Package for validating a scene via an onthology."""
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0

import abc
import typing as t
from dataclasses import dataclass
from importlib import import_module
from inspect import isclass
from pathlib import Path
from pkgutil import iter_modules


@dataclass
class _Attribute(abc.ABC):
@classmethod
@abc.abstractmethod
def supports(cls, data_dict: dict) -> bool:
raise NotImplementedError

@classmethod
@abc.abstractmethod
def fromdict(cls, data_dict: dict) -> t.Type["_Attribute"]:
raise NotImplementedError

@abc.abstractmethod
def check(self, attribute_name: str, attribute_value, annotation_id: str) -> t.List[str]:
raise NotImplementedError


def attribute_classes() -> t.List[t.Type[_Attribute]]:
"""Return dictionary with Attribute child classes."""
return ATTRIBUTE_CLASSES


def _collect_attribute_classes():
"""Collect attribute child classes and store them."""

global ATTRIBUTE_CLASSES

package_dir = str(Path(__file__).resolve().parent)
for (_, module_name, _) in iter_modules([package_dir]):

module = import_module(
f"raillabel_providerkit.validation.validate_onthology._onthology_classes._attributes.{module_name}"
)
for class_name in dir(module):
class_ = getattr(module, class_name)

if isclass(class_) and issubclass(class_, _Attribute) and class_ != _Attribute:
ATTRIBUTE_CLASSES.append(class_)


ATTRIBUTE_CLASSES = []
_collect_attribute_classes()
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0

import typing as t
from dataclasses import dataclass

from ._attribute_abc import _Attribute


@dataclass
class _BooleanAttribute(_Attribute):
@classmethod
def supports(cls, data_dict: dict):
return data_dict == "boolean"

@classmethod
def fromdict(cls, data_dict: dict):
return _BooleanAttribute()

def check(self, attribute_name: str, attribute_value, annotation_id: str) -> t.List[str]:
errors = []

if type(attribute_value) != bool:
errors.append(
f"Attribute '{attribute_name}' of annotation {annotation_id} is of type "
+ f"'{attribute_value.__class__.__name__}' (should be 'bool')."
)

return errors
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0

import typing as t
from dataclasses import dataclass

from ._attribute_abc import _Attribute


@dataclass
class _IntegerAttribute(_Attribute):
@classmethod
def supports(cls, data_dict: dict):
return data_dict == "integer"

@classmethod
def fromdict(cls, data_dict: dict):
return _IntegerAttribute()

def check(self, attribute_name: str, attribute_value, annotation_id: str) -> t.List[str]:
errors = []

if type(attribute_value) != int:
errors.append(
f"Attribute '{attribute_name}' of annotation {annotation_id} is of type "
+ f"'{attribute_value.__class__.__name__}' (should be 'int')."
)

return errors
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0

import typing as t
from dataclasses import dataclass

from ._attribute_abc import _Attribute


@dataclass
class _MultiSelectAttribute(_Attribute):

options: t.Set[str]

@classmethod
def supports(cls, data_dict: dict):
return (
type(data_dict) == dict and "type" in data_dict and data_dict["type"] == "multi-select"
)

@classmethod
def fromdict(cls, data_dict: dict):
return _MultiSelectAttribute(options=set(data_dict["options"]))

def check(self, attribute_name: str, attribute_values, annotation_id: str) -> t.List[str]:

if type(attribute_values) != list:
return [
f"Attribute '{attribute_name}' of annotation {annotation_id} is of type "
+ f"'{attribute_values.__class__.__name__}' (should be 'list')."
]

for attribute_value in attribute_values:
if attribute_value not in self.options:
return [
f"Attribute '{attribute_name}' of annotation {annotation_id} has an undefined "
+ f"value '{attribute_value}' (defined options: {self._stringify_options()})."
]

return []

def _stringify_options(self) -> str:
options_str = ""

for option in sorted(list(self.options)):
options_str += f"'{option}', "

if options_str != "":
options_str = options_str[:-2]

return options_str
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0

import typing as t
from dataclasses import dataclass

from ._attribute_abc import _Attribute


@dataclass
class _SingleSelectAttribute(_Attribute):

options: t.Set[str]

@classmethod
def supports(cls, data_dict: dict):
return (
type(data_dict) == dict and "type" in data_dict and data_dict["type"] == "single-select"
)

@classmethod
def fromdict(cls, data_dict: dict):
return _SingleSelectAttribute(options=set(data_dict["options"]))

def check(self, attribute_name: str, attribute_value, annotation_id: str) -> t.List[str]:

if type(attribute_value) != str:
return [
f"Attribute '{attribute_name}' of annotation {annotation_id} is of type "
+ f"'{attribute_value.__class__.__name__}' (should be 'str')."
]

if attribute_value not in self.options:
return [
f"Attribute '{attribute_name}' of annotation {annotation_id} has an undefined "
+ f"value '{attribute_value}' (defined options: {self._stringify_options()})."
]

return []

def _stringify_options(self) -> str:
options_str = ""

for option in sorted(list(self.options)):
options_str += f"'{option}', "

if options_str != "":
options_str = options_str[:-2]

return options_str
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0

import typing as t
from dataclasses import dataclass

from ._attribute_abc import _Attribute


@dataclass
class _StringAttribute(_Attribute):
@classmethod
def supports(cls, data_dict: dict):
return data_dict == "string"

@classmethod
def fromdict(cls, data_dict: dict):
return _StringAttribute()

def check(self, attribute_name: str, attribute_value, annotation_id: str) -> t.List[str]:
errors = []

if type(attribute_value) != str:
errors.append(
f"Attribute '{attribute_name}' of annotation {annotation_id} is of type "
+ f"'{attribute_value.__class__.__name__}' (should be 'str')."
)

return errors
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0

import typing as t
from dataclasses import dataclass

from ._attribute_abc import _Attribute


@dataclass
class _VectorAttribute(_Attribute):
@classmethod
def supports(cls, data_dict: dict):
return data_dict == "vector"

@classmethod
def fromdict(cls, data_dict: dict):
return _VectorAttribute()

def check(self, attribute_name: str, attribute_value, annotation_id: str) -> t.List[str]:
errors = []

if type(attribute_value) != list:
errors.append(
f"Attribute '{attribute_name}' of annotation {annotation_id} is of type "
+ f"'{attribute_value.__class__.__name__}' (should be 'list')."
)

return errors
Loading

0 comments on commit a089bd0

Please sign in to comment.