Skip to content

Commit

Permalink
feat: onthology validation
Browse files Browse the repository at this point in the history
  • Loading branch information
tklockau committed Oct 20, 2023
1 parent dcc0cc2 commit 74527c1
Show file tree
Hide file tree
Showing 18 changed files with 1,250 additions and 1 deletion.
6 changes: 6 additions & 0 deletions raillabel_providerkit/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,9 @@ class SchemaError(Exception):
"""Raised when the data does not validate against a given schema."""

__module__ = "raillabel_providerkit"


class OnthologySchemaError(Exception):
"""Raised when the .yaml-file provided is not valid against the schema."""

__module__ = "raillabel_providerkit"
2 changes: 2 additions & 0 deletions raillabel_providerkit/validation/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0
"""Package for validating raillabel data regarding the format requirements."""

from .validate_onthology.validate_onthology import validate_onthology
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0
"""Package for validating a scene via an onthology."""
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0

import abc
import typing as t
from dataclasses import dataclass
from importlib import import_module
from inspect import isclass
from pathlib import Path
from pkgutil import iter_modules


@dataclass
class _Attribute(abc.ABC):
@abc.abstractclassmethod
def supports(cls, data_dict: dict) -> bool:
raise NotImplementedError

@abc.abstractclassmethod
def fromdict(cls, data_dict: dict) -> t.Type["_Attribute"]:
raise NotImplementedError

@abc.abstractmethod
def check(self, attribute_name: str, attribute_value, annotation_id: str) -> t.List[str]:
raise NotImplementedError


def attribute_classes() -> list[t.Type[_Attribute]]:
"""Return dictionary with Attribute child classes."""
return ATTRIBUTE_CLASSES


def _collect_attribute_classes():
"""Collect attribute child classes and store them."""

global ATTRIBUTE_CLASSES

package_dir = str(Path(__file__).resolve().parent)
for (_, module_name, _) in iter_modules([package_dir]):

module = import_module(
f"raillabel_providerkit.validation.validate_onthology._onthology_classes._attributes.{module_name}"
)
for class_name in dir(module):
class_ = getattr(module, class_name)

if isclass(class_) and issubclass(class_, _Attribute) and class_ != _Attribute:
ATTRIBUTE_CLASSES.append(class_)


ATTRIBUTE_CLASSES = []
_collect_attribute_classes()
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0

import typing as t
from dataclasses import dataclass

from ._attribute_abc import _Attribute


@dataclass
class _BooleanAttribute(_Attribute):
@classmethod
def supports(cls, data_dict: dict):
return data_dict == "boolean"

@classmethod
def fromdict(cls, data_dict: dict):
return _BooleanAttribute()

def check(self, attribute_name: str, attribute_value, annotation_id: str) -> t.List[str]:
errors = []

if type(attribute_value) != bool:
errors.append(
f"Attribute '{attribute_name}' of annotation {annotation_id} is of type "
+ f"'{attribute_value.__class__.__name__}' (should be 'bool')."
)

return errors
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0

import typing as t
from dataclasses import dataclass

from ._attribute_abc import _Attribute


@dataclass
class _IntegerAttribute(_Attribute):
@classmethod
def supports(cls, data_dict: dict):
return data_dict == "integer"

@classmethod
def fromdict(cls, data_dict: dict):
return _IntegerAttribute()

def check(self, attribute_name: str, attribute_value, annotation_id: str) -> t.List[str]:
errors = []

if type(attribute_value) != int:
errors.append(
f"Attribute '{attribute_name}' of annotation {annotation_id} is of type "
+ f"'{attribute_value.__class__.__name__}' (should be 'int')."
)

return errors
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0

import typing as t
from dataclasses import dataclass

from ._attribute_abc import _Attribute


@dataclass
class _MultiSelectAttribute(_Attribute):

options: set[str]

@classmethod
def supports(cls, data_dict: dict):
return (
type(data_dict) == dict and "type" in data_dict and data_dict["type"] == "multi-select"
)

@classmethod
def fromdict(cls, data_dict: dict):
return _MultiSelectAttribute(options=set(data_dict["options"]))

def check(self, attribute_name: str, attribute_values, annotation_id: str) -> t.List[str]:

if type(attribute_values) != list:
return [
f"Attribute '{attribute_name}' of annotation {annotation_id} is of type "
+ f"'{attribute_values.__class__.__name__}' (should be 'list')."
]

for attribute_value in attribute_values:
if attribute_value not in self.options:
return [
f"Attribute '{attribute_name}' of annotation {annotation_id} has an undefined "
+ f"value '{attribute_value}' (defined options: {self._stringify_options()})."
]

return []

def _stringify_options(self) -> str:
options_str = ""

for option in sorted(list(self.options)):
options_str += f"'{option}', "

if options_str != "":
options_str = options_str[:-2]

return options_str
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0

import typing as t
from dataclasses import dataclass

from ._attribute_abc import _Attribute


@dataclass
class _SingleSelectAttribute(_Attribute):

options: set[str]

@classmethod
def supports(cls, data_dict: dict):
return (
type(data_dict) == dict and "type" in data_dict and data_dict["type"] == "single-select"
)

@classmethod
def fromdict(cls, data_dict: dict):
return _SingleSelectAttribute(options=set(data_dict["options"]))

def check(self, attribute_name: str, attribute_value, annotation_id: str) -> t.List[str]:

if type(attribute_value) != str:
return [
f"Attribute '{attribute_name}' of annotation {annotation_id} is of type "
+ f"'{attribute_value.__class__.__name__}' (should be 'str')."
]

if attribute_value not in self.options:
return [
f"Attribute '{attribute_name}' of annotation {annotation_id} has an undefined "
+ f"value '{attribute_value}' (defined options: {self._stringify_options()})."
]

return []

def _stringify_options(self) -> str:
options_str = ""

for option in sorted(list(self.options)):
options_str += f"'{option}', "

if options_str != "":
options_str = options_str[:-2]

return options_str
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0

import typing as t
from dataclasses import dataclass

from ._attribute_abc import _Attribute


@dataclass
class _StringAttribute(_Attribute):
@classmethod
def supports(cls, data_dict: dict):
return data_dict == "string"

@classmethod
def fromdict(cls, data_dict: dict):
return _StringAttribute()

def check(self, attribute_name: str, attribute_value, annotation_id: str) -> t.List[str]:
errors = []

if type(attribute_value) != str:
errors.append(
f"Attribute '{attribute_name}' of annotation {annotation_id} is of type "
+ f"'{attribute_value.__class__.__name__}' (should be 'str')."
)

return errors
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright DB Netz AG and contributors
# SPDX-License-Identifier: Apache-2.0

import typing as t
from dataclasses import dataclass

from ._attribute_abc import _Attribute


@dataclass
class _VectorAttribute(_Attribute):
@classmethod
def supports(cls, data_dict: dict):
return data_dict == "vector"

@classmethod
def fromdict(cls, data_dict: dict):
return _VectorAttribute()

def check(self, attribute_name: str, attribute_value, annotation_id: str) -> t.List[str]:
errors = []

if type(attribute_value) != list:
errors.append(
f"Attribute '{attribute_name}' of annotation {annotation_id} is of type "
+ f"'{attribute_value.__class__.__name__}' (should be 'list')."
)

return errors
Loading

0 comments on commit 74527c1

Please sign in to comment.