diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2dc7a63f..73871d42 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,12 +9,33 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
+- add pattern constants for vocabs, emails, urls and ids to types module
+- add regex pattern to json schema of identifier fields
+- automatically add examples and useScheme to json schema of enum fields
+
### Changes
+- BREAKING: use `identifier` instead of `stableTargetId` to get merged item from backend
+- ensure identifier unions are typed to generic `Identifier` instead of the first match
+ to signal that we don't actually know which of the union types is correct
+- unify pydantic schema configuration for all types
+- consistently parse emails, identifiers and temporals in models to their type, not str
+- consistently serialize emails, ids and temporals in models to str, not their type
+- make instances of Link type hashable, to harmonize them with Text models
+
### Deprecated
### Removed
+- drop manual examples from enum fields, because they are autogenerated now
+- BREAKING: remove `MEX_ID_PATTERN` from types, in favor of `IDENTIFIER_PATTERN`
+- BREAKING: make public `MEX_ID_ALPHABET` constant from identifier module private
+- BREAKING: remove `__str__` methods from Text and Link classes
+- BREAKING: drop support for parsing UUIDs as Identifiers, this was unused
+- BREAKING: drop support for parsing Links from markdown syntax, this was unused
+- BREAKING: remove pydantic1-style `validate` methods from all type models
+- BREAKING: `BackendApiConnector.post_models` in favor of `post_extracted_items`
+
### Fixed
### Security
@@ -22,8 +43,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [0.37.0] - 2024-10-01
### Added
-- added methods for extracting persons by name or ID from ldap
+- added methods for extracting persons by name or ID from ldap
- `contains_only_types` to check if fields are annotated as desired
- `group_fields_by_class_name` utility to simplify filtered model/field lookups
- new parameters to `get_inner_types` to customize what to unpack
diff --git a/mex/common/backend_api/connector.py b/mex/common/backend_api/connector.py
index 4db67cfe..94731488 100644
--- a/mex/common/backend_api/connector.py
+++ b/mex/common/backend_api/connector.py
@@ -1,4 +1,3 @@
-from typing import cast
from urllib.parse import urljoin
from requests.exceptions import HTTPError
@@ -19,7 +18,6 @@
AnyRuleSetResponse,
)
from mex.common.settings import BaseSettings
-from mex.common.types import AnyExtractedIdentifier
class BackendApiConnector(HTTPConnector):
@@ -41,27 +39,6 @@ def _set_url(self) -> None:
settings = BaseSettings.get()
self.url = urljoin(str(settings.backend_api_url), self.API_VERSION)
- def post_models(
- self,
- extracted_items: list[AnyExtractedModel],
- ) -> list[AnyExtractedIdentifier]:
- """Post extracted models to the backend in bulk.
-
- Args:
- extracted_items: Extracted models to post
-
- Raises:
- HTTPError: If post was not accepted, crashes or times out
-
- Returns:
- Identifiers of posted extracted models
- """
- # XXX deprecated method, please use `post_extracted_models` instead
- return cast(
- list[AnyExtractedIdentifier],
- self.post_extracted_items(extracted_items).identifiers,
- )
-
def post_extracted_items(
self,
extracted_items: list[AnyExtractedModel],
@@ -141,7 +118,6 @@ def fetch_merged_items(
Returns:
One page of merged items and the total count that was matched
"""
- # XXX this endpoint will only return faux merged items for now (MX-1382)
response = self.request(
method="GET",
endpoint="merged-item",
@@ -156,12 +132,12 @@ def fetch_merged_items(
def get_merged_item(
self,
- stable_target_id: str,
+ identifier: str,
) -> AnyMergedModel:
- """Return one merged item for the given `stableTargetId`.
+ """Return one merged item for the given `identifier`.
Args:
- stable_target_id: The merged item's identifier
+ identifier: The merged item's identifier
Raises:
MExError: If no merged item was found
@@ -174,7 +150,7 @@ def get_merged_item(
method="GET",
endpoint="merged-item",
params={
- "stableTargetId": stable_target_id,
+ "identifier": identifier,
"limit": "1",
},
)
@@ -201,7 +177,6 @@ def preview_merged_item(
Returns:
A single merged item
"""
- # XXX experimental method until the backend has a preview endpoint (MX-1406)
response = self.request(
method="GET",
endpoint=f"preview-item/{stable_target_id}",
@@ -224,7 +199,6 @@ def get_rule_set(
Returns:
A set of three rules
"""
- # XXX experimental method until the backend has a rule-set endpoint (MX-1416)
response = self.request(
method="GET",
endpoint=f"rule-set/{stable_target_id}",
diff --git a/mex/common/models/access_platform.py b/mex/common/models/access_platform.py
index e068952c..38b0daf8 100644
--- a/mex/common/models/access_platform.py
+++ b/mex/common/models/access_platform.py
@@ -2,7 +2,7 @@
from typing import Annotated, ClassVar, Literal
-from pydantic import Field, computed_field
+from pydantic import AfterValidator, Field, computed_field
from mex.common.models.base.extracted_data import ExtractedData
from mex.common.models.base.merged_item import MergedItem
@@ -16,6 +16,7 @@
from mex.common.types import (
APIType,
ExtractedAccessPlatformIdentifier,
+ Identifier,
Link,
MergedAccessPlatformIdentifier,
MergedContactPointIdentifier,
@@ -36,9 +37,12 @@ class _Stem(BaseModel):
class _OptionalLists(_Stem):
alternativeTitle: list[Text] = []
contact: list[
- MergedOrganizationalUnitIdentifier
- | MergedPersonIdentifier
- | MergedContactPointIdentifier
+ Annotated[
+ MergedOrganizationalUnitIdentifier
+ | MergedPersonIdentifier
+ | MergedContactPointIdentifier,
+ AfterValidator(Identifier),
+ ]
] = []
description: list[Text] = []
landingPage: list[Link] = []
@@ -48,39 +52,23 @@ class _OptionalLists(_Stem):
class _OptionalValues(_Stem):
endpointDescription: Link | None = None
- endpointType: (
- Annotated[APIType, Field(examples=["https://mex.rki.de/item/api-type-1"])]
- | None
- ) = None
+ endpointType: APIType | None = None
endpointURL: Link | None = None
class _RequiredValues(_Stem):
- technicalAccessibility: Annotated[
- TechnicalAccessibility,
- Field(examples=["https://mex.rki.de/item/technical-accessibility-1"]),
- ]
+ technicalAccessibility: TechnicalAccessibility
class _SparseValues(_Stem):
- technicalAccessibility: Annotated[
- TechnicalAccessibility | None,
- Field(examples=["https://mex.rki.de/item/technical-accessibility-1"]),
- ] = None
+ technicalAccessibility: TechnicalAccessibility | None = None
class _VariadicValues(_Stem):
endpointDescription: list[Link]
- endpointType: list[
- Annotated[APIType, Field(examples=["https://mex.rki.de/item/api-type-1"])]
- ] = []
+ endpointType: list[APIType] = []
endpointURL: list[Link] = []
- technicalAccessibility: list[
- Annotated[
- TechnicalAccessibility,
- Field(examples=["https://mex.rki.de/item/technical-accessibility-1"]),
- ]
- ] = []
+ technicalAccessibility: list[TechnicalAccessibility] = []
class BaseAccessPlatform(_OptionalLists, _OptionalValues, _RequiredValues):
diff --git a/mex/common/models/activity.py b/mex/common/models/activity.py
index 8ed2de24..03e1bd25 100644
--- a/mex/common/models/activity.py
+++ b/mex/common/models/activity.py
@@ -5,7 +5,7 @@
from typing import Annotated, ClassVar, Literal
-from pydantic import Field, computed_field
+from pydantic import AfterValidator, Field, computed_field
from mex.common.models.base.extracted_data import ExtractedData
from mex.common.models.base.merged_item import MergedItem
@@ -19,6 +19,7 @@
from mex.common.types import (
ActivityType,
ExtractedActivityIdentifier,
+ Identifier,
Link,
MergedActivityIdentifier,
MergedContactPointIdentifier,
@@ -39,15 +40,16 @@ class _Stem(BaseModel):
class _OptionalLists(_Stem):
abstract: list[Text] = []
- activityType: list[
- Annotated[
- ActivityType, Field(examples=["https://mex.rki.de/item/activity-type-1"])
- ]
- ] = []
+ activityType: list[ActivityType] = []
alternativeTitle: list[Text] = []
documentation: list[Link] = []
end: list[YearMonthDay | YearMonth] = []
- externalAssociate: list[MergedOrganizationIdentifier | MergedPersonIdentifier] = []
+ externalAssociate: list[
+ Annotated[
+ MergedOrganizationIdentifier | MergedPersonIdentifier,
+ AfterValidator(Identifier),
+ ]
+ ] = []
funderOrCommissioner: list[MergedOrganizationIdentifier] = []
fundingProgram: list[str] = []
involvedPerson: list[MergedPersonIdentifier] = []
@@ -57,18 +59,19 @@ class _OptionalLists(_Stem):
shortName: list[Text] = []
start: list[YearMonthDay | YearMonth] = []
succeeds: list[MergedActivityIdentifier] = []
- theme: list[
- Annotated[Theme, Field(examples=["https://mex.rki.de/item/theme-1"])]
- ] = []
+ theme: list[Theme] = []
website: list[Link] = []
class _RequiredLists(_Stem):
contact: Annotated[
list[
- MergedOrganizationalUnitIdentifier
- | MergedPersonIdentifier
- | MergedContactPointIdentifier,
+ Annotated[
+ MergedOrganizationalUnitIdentifier
+ | MergedPersonIdentifier
+ | MergedContactPointIdentifier,
+ AfterValidator(Identifier),
+ ]
],
Field(min_length=1),
]
@@ -80,9 +83,12 @@ class _RequiredLists(_Stem):
class _SparseLists(_Stem):
contact: list[
- MergedOrganizationalUnitIdentifier
- | MergedPersonIdentifier
- | MergedContactPointIdentifier,
+ Annotated[
+ MergedOrganizationalUnitIdentifier
+ | MergedPersonIdentifier
+ | MergedContactPointIdentifier,
+ AfterValidator(Identifier),
+ ]
] = []
responsibleUnit: list[MergedOrganizationalUnitIdentifier] = []
title: list[Text] = []
diff --git a/mex/common/models/consent.py b/mex/common/models/consent.py
new file mode 100644
index 00000000..3e62ee0b
--- /dev/null
+++ b/mex/common/models/consent.py
@@ -0,0 +1,3 @@
+# XXX this is a forward-compatibility hint for feature/model-update-v3:
+# when this gets merged with model v3, remove the
+# `Annotated[..., Field(examples=["https://mex..."])]` from all enum fields
diff --git a/mex/common/models/distribution.py b/mex/common/models/distribution.py
index 295374e8..16bac711 100644
--- a/mex/common/models/distribution.py
+++ b/mex/common/models/distribution.py
@@ -59,26 +59,13 @@ class _OptionalValues(_Stem):
accessService: MergedAccessPlatformIdentifier | None = None
accessURL: Link | None = None
downloadURL: Link | None = None
- license: (
- Annotated[License, Field(examples=["https://mex.rki.de/item/license-1"])] | None
- ) = None
- mediaType: (
- Annotated[
- MIMEType,
- Field(
- examples=["https://mex.rki.de/item/mime-type-1"],
- ),
- ]
- | None
- ) = None
+ license: License | None = None
+ mediaType: MIMEType | None = None
modified: YearMonthDayTime | YearMonthDay | YearMonth | None = None
class _RequiredValues(_Stem):
- accessRestriction: Annotated[
- AccessRestriction,
- Field(examples=["https://mex.rki.de/item/access-restriction-1"]),
- ]
+ accessRestriction: AccessRestriction
issued: YearMonthDayTime | YearMonthDay | YearMonth
title: Annotated[
str,
@@ -90,13 +77,7 @@ class _RequiredValues(_Stem):
class _SparseValues(_Stem):
- accessRestriction: (
- Annotated[
- AccessRestriction,
- Field(examples=["https://mex.rki.de/item/access-restriction-1"]),
- ]
- | None
- ) = None
+ accessRestriction: AccessRestriction | None = None
issued: YearMonthDayTime | YearMonthDay | YearMonth | None = None
title: (
Annotated[
@@ -111,12 +92,7 @@ class _SparseValues(_Stem):
class _VariadicValues(_Stem):
- accessRestriction: list[
- Annotated[
- AccessRestriction,
- Field(examples=["https://mex.rki.de/item/access-restriction-1"]),
- ]
- ] = []
+ accessRestriction: list[AccessRestriction] = []
issued: list[YearMonthDayTime | YearMonthDay | YearMonth] = []
title: list[
Annotated[
diff --git a/mex/common/models/primary_source.py b/mex/common/models/primary_source.py
index f96f307c..0cee5692 100644
--- a/mex/common/models/primary_source.py
+++ b/mex/common/models/primary_source.py
@@ -2,7 +2,7 @@
from typing import Annotated, ClassVar, Literal
-from pydantic import Field, computed_field
+from pydantic import AfterValidator, Field, computed_field
from mex.common.models.base.extracted_data import ExtractedData
from mex.common.models.base.merged_item import MergedItem
@@ -15,6 +15,7 @@
)
from mex.common.types import (
ExtractedPrimarySourceIdentifier,
+ Identifier,
Link,
MergedContactPointIdentifier,
MergedOrganizationalUnitIdentifier,
@@ -33,9 +34,12 @@ class _Stem(BaseModel):
class _OptionalLists(_Stem):
alternativeTitle: list[Text] = []
contact: list[
- MergedOrganizationalUnitIdentifier
- | MergedPersonIdentifier
- | MergedContactPointIdentifier
+ Annotated[
+ MergedOrganizationalUnitIdentifier
+ | MergedPersonIdentifier
+ | MergedContactPointIdentifier,
+ AfterValidator(Identifier),
+ ]
] = []
description: list[Text] = []
documentation: list[Link] = []
diff --git a/mex/common/models/resource.py b/mex/common/models/resource.py
index 43df13a6..d7826368 100644
--- a/mex/common/models/resource.py
+++ b/mex/common/models/resource.py
@@ -2,7 +2,7 @@
from typing import Annotated, ClassVar, Literal
-from pydantic import Field, computed_field
+from pydantic import AfterValidator, Field, computed_field
from mex.common.models.base.extracted_data import ExtractedData
from mex.common.models.base.merged_item import MergedItem
@@ -19,6 +19,7 @@
DataProcessingState,
ExtractedResourceIdentifier,
Frequency,
+ Identifier,
Language,
License,
Link,
@@ -47,14 +48,7 @@ class _Stem(BaseModel):
class _OptionalLists(_Stem):
accessPlatform: list[MergedAccessPlatformIdentifier] = []
alternativeTitle: list[Text] = []
- anonymizationPseudonymization: list[
- Annotated[
- AnonymizationPseudonymization,
- Field(
- examples=["https://mex.rki.de/item/anonymization-pseudonymization-1"]
- ),
- ]
- ] = []
+ anonymizationPseudonymization: list[AnonymizationPseudonymization] = []
contributingUnit: list[MergedOrganizationalUnitIdentifier] = []
contributor: list[MergedPersonIdentifier] = []
creator: list[MergedPersonIdentifier] = []
@@ -66,9 +60,7 @@ class _OptionalLists(_Stem):
instrumentToolOrApparatus: list[Text] = []
isPartOf: list[MergedResourceIdentifier] = []
keyword: list[Text] = []
- language: list[
- Annotated[Language, Field(examples=["https://mex.rki.de/item/language-1"])]
- ] = []
+ language: list[Language] = []
loincId: list[str] = []
meshId: list[
Annotated[
@@ -85,40 +77,26 @@ class _OptionalLists(_Stem):
publication: list[Link] = []
publisher: list[MergedOrganizationIdentifier] = []
qualityInformation: list[Text] = []
- resourceTypeGeneral: list[
- Annotated[
- ResourceTypeGeneral,
- Field(
- examples=["https://mex.rki.de/item/resource-type-general-1"],
- ),
- ]
- ] = []
+ resourceTypeGeneral: list[ResourceTypeGeneral] = []
resourceTypeSpecific: list[Text] = []
rights: list[Text] = []
spatial: list[Text] = []
- stateOfDataProcessing: list[
- Annotated[
- DataProcessingState,
- Field(
- examples=["https://mex.rki.de/item/data-processing-state-1"],
- ),
- ]
- ] = []
+ stateOfDataProcessing: list[DataProcessingState] = []
class _RequiredLists(_Stem):
contact: Annotated[
list[
- MergedOrganizationalUnitIdentifier
- | MergedPersonIdentifier
- | MergedContactPointIdentifier
+ Annotated[
+ MergedOrganizationalUnitIdentifier
+ | MergedPersonIdentifier
+ | MergedContactPointIdentifier,
+ AfterValidator(Identifier),
+ ]
],
Field(min_length=1),
]
- theme: Annotated[
- list[Annotated[Theme, Field(examples=["https://mex.rki.de/item/theme-1"])]],
- Field(min_length=1),
- ]
+ theme: Annotated[list[Theme], Field(min_length=1)]
title: Annotated[list[Text], Field(min_length=1)]
unitInCharge: Annotated[
list[MergedOrganizationalUnitIdentifier], Field(min_length=1)
@@ -127,26 +105,22 @@ class _RequiredLists(_Stem):
class _SparseLists(_Stem):
contact: list[
- MergedOrganizationalUnitIdentifier
- | MergedPersonIdentifier
- | MergedContactPointIdentifier
- ] = []
- theme: list[
- Annotated[Theme, Field(examples=["https://mex.rki.de/item/theme-1"])]
+ Annotated[
+ MergedOrganizationalUnitIdentifier
+ | MergedPersonIdentifier
+ | MergedContactPointIdentifier,
+ AfterValidator(Identifier),
+ ]
] = []
+ theme: list[Theme] = []
title: list[Text] = []
unitInCharge: list[MergedOrganizationalUnitIdentifier] = []
class _OptionalValues(_Stem):
- accrualPeriodicity: (
- Annotated[Frequency, Field(examples=["https://mex.rki.de/item/frequency-1"])]
- | None
- ) = None
+ accrualPeriodicity: Frequency | None = None
created: YearMonthDayTime | YearMonthDay | YearMonth | None = None
- license: (
- Annotated[License, Field(examples=["https://mex.rki.de/item/license-1"])] | None
- ) = None
+ license: License | None = None
modified: YearMonthDayTime | YearMonthDay | YearMonth | None = None
sizeOfDataBasis: str | None = None
temporal: (
@@ -170,42 +144,18 @@ class _OptionalValues(_Stem):
class _RequiredValues(_Stem):
- accessRestriction: Annotated[
- AccessRestriction,
- Field(
- examples=["https://mex.rki.de/item/access-restriction-1"],
- ),
- ]
+ accessRestriction: AccessRestriction
class _SparseValues(_Stem):
- accessRestriction: (
- Annotated[
- AccessRestriction,
- Field(
- examples=["https://mex.rki.de/item/access-restriction-1"],
- ),
- ]
- | None
- ) = None
+ accessRestriction: AccessRestriction | None = None
class _VariadicValues(_Stem):
- accessRestriction: list[
- Annotated[
- AccessRestriction,
- Field(
- examples=["https://mex.rki.de/item/access-restriction-1"],
- ),
- ]
- ] = []
- accrualPeriodicity: list[
- Annotated[Frequency, Field(examples=["https://mex.rki.de/item/frequency-1"])]
- ] = []
+ accessRestriction: list[AccessRestriction] = []
+ accrualPeriodicity: list[Frequency] = []
created: list[YearMonthDayTime | YearMonthDay | YearMonth] = []
- license: list[
- Annotated[License, Field(examples=["https://mex.rki.de/item/license-1"])]
- ] = []
+ license: list[License] = []
modified: list[YearMonthDayTime | YearMonthDay | YearMonth] = []
sizeOfDataBasis: list[str] = []
temporal: list[
diff --git a/mex/common/models/variable.py b/mex/common/models/variable.py
index 9e511edb..bd35ffb6 100644
--- a/mex/common/models/variable.py
+++ b/mex/common/models/variable.py
@@ -86,15 +86,7 @@ class _OptionalValues(_Stem):
]
| None
) = None
- dataType: (
- Annotated[
- DataType,
- Field(
- examples=["https://mex.rki.de/item/data-type-1"],
- ),
- ]
- | None
- ) = None
+ dataType: DataType | None = None
class _VariadicValues(_Stem):
@@ -106,14 +98,7 @@ class _VariadicValues(_Stem):
),
]
] = []
- dataType: list[
- Annotated[
- DataType,
- Field(
- examples=["https://mex.rki.de/item/data-type-1"],
- ),
- ]
- ] = []
+ dataType: list[DataType] = []
class BaseVariable(_OptionalLists, _RequiredLists, _OptionalValues):
diff --git a/mex/common/types/__init__.py b/mex/common/types/__init__.py
index 8df713b9..6465a4e9 100644
--- a/mex/common/types/__init__.py
+++ b/mex/common/types/__init__.py
@@ -1,8 +1,8 @@
from typing import Final, Literal, get_args
-from mex.common.types.email import Email
+from mex.common.types.email import EMAIL_PATTERN, Email
from mex.common.types.identifier import (
- MEX_ID_PATTERN,
+ IDENTIFIER_PATTERN,
ExtractedAccessPlatformIdentifier,
ExtractedActivityIdentifier,
ExtractedContactPointIdentifier,
@@ -46,6 +46,7 @@
)
from mex.common.types.text import Text, TextLanguage
from mex.common.types.vocabulary import (
+ VOCABULARY_PATTERN,
AccessRestriction,
ActivityType,
AnonymizationPseudonymization,
@@ -77,6 +78,7 @@
"CET",
"DataProcessingState",
"DataType",
+ "EMAIL_PATTERN",
"Email",
"EXTRACTED_IDENTIFIER_CLASSES_BY_NAME",
"EXTRACTED_IDENTIFIER_CLASSES",
@@ -93,6 +95,7 @@
"ExtractedVariableGroupIdentifier",
"ExtractedVariableIdentifier",
"Frequency",
+ "IDENTIFIER_PATTERN",
"Identifier",
"IdentityProvider",
"Language",
@@ -114,7 +117,6 @@
"MergedResourceIdentifier",
"MergedVariableGroupIdentifier",
"MergedVariableIdentifier",
- "MEX_ID_PATTERN",
"MIMEType",
"NESTED_MODEL_CLASSES_BY_NAME",
"NESTED_MODEL_CLASSES",
@@ -130,7 +132,9 @@
"Text",
"TextLanguage",
"Theme",
+ "URL_PATTERN",
"UTC",
+ "VOCABULARY_PATTERN",
"VocabularyEnum",
"VocabularyLoader",
"WorkPath",
diff --git a/mex/common/types/email.py b/mex/common/types/email.py
index 89942581..3a98df1b 100644
--- a/mex/common/types/email.py
+++ b/mex/common/types/email.py
@@ -14,7 +14,13 @@ def __get_pydantic_core_schema__(
cls, source_type: Any, handler: GetCoreSchemaHandler
) -> core_schema.CoreSchema:
"""Modify the core schema to add the email regex."""
- return core_schema.str_schema(pattern=EMAIL_PATTERN)
+ return core_schema.chain_schema(
+ [
+ core_schema.str_schema(pattern=EMAIL_PATTERN),
+ core_schema.no_info_plain_validator_function(cls),
+ ],
+ serialization=core_schema.to_string_ser_schema(when_used="unless-none"),
+ )
@classmethod
def __get_pydantic_json_schema__(
@@ -26,3 +32,7 @@ def __get_pydantic_json_schema__(
json_schema_["format"] = "email"
json_schema_["examples"] = ["info@rki.de"]
return json_schema_
+
+ def __repr__(self) -> str:
+ """Overwrite the default representation."""
+ return f'{self.__class__.__name__}("{self}")'
diff --git a/mex/common/types/identifier.py b/mex/common/types/identifier.py
index 2b6eb9b2..c2537dee 100644
--- a/mex/common/types/identifier.py
+++ b/mex/common/types/identifier.py
@@ -1,4 +1,3 @@
-import re
import string
from typing import Any, Self
from uuid import UUID, uuid4
@@ -6,9 +5,8 @@
from pydantic import GetCoreSchemaHandler, GetJsonSchemaHandler, json_schema
from pydantic_core import core_schema
-MEX_ID_ALPHABET = string.ascii_letters + string.digits
-MEX_ID_PATTERN = r"^[a-zA-Z0-9]{14,22}$"
-UUID_PATTERN = r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$"
+_ALPHABET = string.ascii_letters + string.digits
+IDENTIFIER_PATTERN = r"^[a-zA-Z0-9]{14,22}$"
class Identifier(str):
@@ -19,35 +17,27 @@ def generate(cls, seed: int | None = None) -> Self:
"""Generate a new identifier from a seed or random UUID version 4."""
# Inspired by https://pypi.org/project/shortuuid
output = ""
- alpha_len = len(MEX_ID_ALPHABET)
+ alpha_len = len(_ALPHABET)
if seed is None:
number = uuid4().int
else:
number = UUID(int=seed, version=4).int
while number:
number, digit = divmod(number, alpha_len)
- output += MEX_ID_ALPHABET[digit]
+ output += _ALPHABET[digit]
return cls(output[::-1])
- @classmethod
- def validate(cls, value: Any) -> Self:
- """Validate a string, UUID or Identifier."""
- if isinstance(value, str | UUID | Identifier):
- value = str(value)
- if re.match(MEX_ID_PATTERN, value):
- return cls(value)
- if re.match(UUID_PATTERN, value):
- return cls.generate(seed=UUID(value).int)
- raise ValueError(f"Invalid identifier format: {value}")
- raise ValueError(f"Cannot parse {type(value)} as {cls.__name__}")
-
@classmethod
def __get_pydantic_core_schema__(
cls, source_type: Any, handler: GetCoreSchemaHandler
) -> core_schema.CoreSchema:
"""Modify the core schema to add the ID regex."""
- return core_schema.no_info_before_validator_function(
- cls.validate, core_schema.str_schema(pattern=MEX_ID_PATTERN)
+ return core_schema.chain_schema(
+ [
+ core_schema.str_schema(pattern=IDENTIFIER_PATTERN),
+ core_schema.no_info_plain_validator_function(cls),
+ ],
+ serialization=core_schema.to_string_ser_schema(when_used="unless-none"),
)
@classmethod
@@ -58,11 +48,12 @@ def __get_pydantic_json_schema__(
json_schema_ = handler(core_schema_)
json_schema_ = handler.resolve_ref_schema(json_schema_)
json_schema_["title"] = cls.__name__
+ json_schema_["pattern"] = IDENTIFIER_PATTERN
return json_schema_
def __repr__(self) -> str:
"""Overwrite the default representation."""
- return f"{self.__class__.__name__}({super().__str__().__repr__()})"
+ return f'{self.__class__.__name__}("{self}")'
# We have technically-identical subclasses of identifier types (one per entity-type).
diff --git a/mex/common/types/link.py b/mex/common/types/link.py
index a29ff7cf..7ed49d54 100644
--- a/mex/common/types/link.py
+++ b/mex/common/types/link.py
@@ -1,25 +1,9 @@
-import re
from enum import StrEnum
from typing import Annotated, Any
from pydantic import BaseModel, Field, model_validator
-# https://daringfireball.net/projects/markdown/syntax#backslash
-MARKDOWN_SPECIAL_CHARS = r"\`*_{}[]()#+-.!"
-
-
-def markdown_escape(string: str) -> str:
- """Escape all special characters for markdown usage."""
- for char in MARKDOWN_SPECIAL_CHARS:
- string = string.replace(char, f"\\{char}")
- return string
-
-
-def markdown_unescape(string: str) -> str:
- """Unescape all special characters from a markdown string."""
- for char in MARKDOWN_SPECIAL_CHARS:
- string = string.replace(f"\\{char}", char)
- return string
+URL_PATTERN = r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?"
class LinkLanguage(StrEnum):
@@ -32,10 +16,10 @@ class LinkLanguage(StrEnum):
class Link(BaseModel):
"""Type class for Link objects.
- Links can be parsed from nested JSON objects or from markdown strings.
+ Links can be parsed from nested JSON objects or from raw strings.
Example:
- Link(url="https://foo", title="Title") == Link.model_validate("[Title](https://foo)")
+ Link(url="http://foo.bar") == Link.model_validate("http://foo.bar")
"""
language: LinkLanguage | None = None
@@ -43,7 +27,7 @@ class Link(BaseModel):
url: Annotated[
str,
Field(
- pattern=r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?",
+ pattern=URL_PATTERN,
min_length=1,
examples=["https://hello-world.org", "file://S:/OE/MF4/Projekte/MEx"],
json_schema_extra={"format": "uri"},
@@ -52,23 +36,14 @@ class Link(BaseModel):
@model_validator(mode="before")
@classmethod
- def convert_markdown_to_link(cls, values: Any) -> dict[str, Any]:
+ def validate_strings(cls, value: Any) -> dict[str, Any]:
"""Convert string input to dictionary."""
- if isinstance(values, dict):
- return values
- if isinstance(values, str):
- if match := re.match(r"\[(?P
.*)\]\((?P.*)\)", values):
- return {
- key: markdown_unescape(value)
- for key, value in match.groupdict().items()
- }
- return {"url": values}
- raise ValueError(f"Allowed input types are dict and str, got {type(values)}")
-
- def __str__(self) -> str:
- """Render the link as markdown if a title is set, otherwise as plain url."""
- if title := self.title:
- title = markdown_escape(title)
- url = markdown_escape(self.url)
- return f"[{title}]({url})"
- return self.url
+ if isinstance(value, str):
+ return {"url": value}
+ if isinstance(value, dict):
+ return value
+ raise ValueError(f"Allowed input types are dict and str, got {type(value)}")
+
+ def __hash__(self) -> int:
+ """Return the hash of this link."""
+ return hash((self.url, self.title, self.language))
diff --git a/mex/common/types/path.py b/mex/common/types/path.py
index 7cc78925..6a1539d1 100644
--- a/mex/common/types/path.py
+++ b/mex/common/types/path.py
@@ -1,6 +1,6 @@
from os import PathLike
from pathlib import Path
-from typing import Any, Self, Union
+from typing import Any, Union
from pydantic import GetCoreSchemaHandler
from pydantic_core import core_schema
@@ -19,6 +19,19 @@ def __init__(self, path: Union[str, Path, "PathWrapper"]) -> None:
path = path._path
self._path = path
+ @classmethod
+ def __get_pydantic_core_schema__(
+ cls, source_type: Any, handler: GetCoreSchemaHandler
+ ) -> core_schema.CoreSchema:
+ """Modify the core schema to add validation and serialization rules."""
+ return core_schema.chain_schema(
+ [
+ core_schema.is_instance_schema(str | Path | PathWrapper),
+ core_schema.no_info_plain_validator_function(cls),
+ ],
+ serialization=core_schema.to_string_ser_schema(when_used="unless-none"),
+ )
+
def __fspath__(self) -> str:
"""Return the file system path representation."""
return self._path.__fspath__()
@@ -49,37 +62,6 @@ def is_relative(self) -> bool:
"""True if the underlying path is relative."""
return not self._path.is_absolute()
- @classmethod
- def __get_pydantic_core_schema__(
- cls, source_type: Any, handler: GetCoreSchemaHandler
- ) -> core_schema.CoreSchema:
- """Set schema to str schema."""
- from_str_schema = core_schema.chain_schema(
- [
- core_schema.str_schema(),
- core_schema.no_info_plain_validator_function(
- cls.validate,
- ),
- ]
- )
- from_anything_schema = core_schema.chain_schema(
- [
- core_schema.no_info_plain_validator_function(cls.validate),
- core_schema.is_instance_schema(PathWrapper),
- ]
- )
- return core_schema.json_or_python_schema(
- json_schema=from_str_schema,
- python_schema=from_anything_schema,
- )
-
- @classmethod
- def validate(cls, value: Any) -> Self:
- """Convert a string value to a Text instance."""
- if isinstance(value, str | Path | PathWrapper):
- return cls(value)
- raise ValueError(f"Cannot parse {type(value)} as {cls.__name__}")
-
class AssetsPath(PathWrapper):
"""Custom path for settings that can be absolute or relative to `assets_dir`."""
diff --git a/mex/common/types/temporal_entity.py b/mex/common/types/temporal_entity.py
index 48f8c8df..3f591db0 100644
--- a/mex/common/types/temporal_entity.py
+++ b/mex/common/types/temporal_entity.py
@@ -193,27 +193,20 @@ def __get_pydantic_core_schema__(
cls, source_type: Any, handler: GetCoreSchemaHandler
) -> core_schema.CoreSchema:
"""Modify the core schema to add validation and serialization rules."""
- from_str_schema = core_schema.chain_schema(
- [
- core_schema.str_schema(pattern=cls.STR_SCHEMA_PATTERN),
- core_schema.no_info_plain_validator_function(
- cls.validate,
- ),
- ]
- )
- from_anything_schema = core_schema.chain_schema(
- [
- core_schema.no_info_plain_validator_function(cls.validate),
- core_schema.is_instance_schema(cls),
- ]
- )
- serialization_schema = core_schema.plain_serializer_function_ser_schema(
- lambda instance: str(instance)
- )
return core_schema.json_or_python_schema(
- json_schema=from_str_schema,
- python_schema=from_anything_schema,
- serialization=serialization_schema,
+ json_schema=core_schema.chain_schema(
+ [
+ core_schema.str_schema(pattern=cls.STR_SCHEMA_PATTERN),
+ core_schema.no_info_plain_validator_function(cls),
+ ]
+ ),
+ python_schema=core_schema.chain_schema(
+ [
+ core_schema.is_instance_schema(cls | date | str | TemporalEntity),
+ core_schema.no_info_plain_validator_function(cls),
+ ]
+ ),
+ serialization=core_schema.to_string_ser_schema(when_used="unless-none"),
)
@classmethod
@@ -221,17 +214,10 @@ def __get_pydantic_json_schema__(
cls, core_schema_: core_schema.CoreSchema, handler: GetJsonSchemaHandler
) -> json_schema.JsonSchemaValue:
"""Modify the json schema to add a title, examples and an optional format."""
- json_schema = handler(core_schema_)
- json_schema["title"] = cls.__name__
- json_schema.update(cls.JSON_SCHEMA_CONFIG)
- return json_schema
-
- @classmethod
- def validate(cls, value: Any) -> "TemporalEntity":
- """Parse any value and try to convert it into a temporal entity."""
- if isinstance(value, cls | date | str | TemporalEntity):
- return cls(value)
- raise TypeError(f"Cannot parse {type(value)} as {cls.__name__}")
+ json_schema_ = handler(core_schema_)
+ json_schema_["title"] = cls.__name__
+ json_schema_.update(cls.JSON_SCHEMA_CONFIG)
+ return json_schema_
@staticmethod
def _parse_integers(
@@ -283,23 +269,24 @@ def _parse_date(
"""Parse a date and assume the precision is days."""
return datetime(value.year, value.month, value.day), TemporalEntityPrecision.DAY
- def __eq__(self, other: object) -> bool:
+ def __eq__(self, other: Any) -> bool:
"""Return whether the given other value is the same as this one."""
try:
- other = self.validate(other)
+ other_temporal = TemporalEntity(other)
except TypeError:
return False
return bool(
- self.date_time == other.date_time and self.precision == other.precision
+ self.date_time == other_temporal.date_time
+ and self.precision == other_temporal.precision
)
def __gt__(self, other: Any) -> bool:
"""Return whether the given other value is the greater than this one."""
try:
- other = self.validate(other)
+ other_temporal = TemporalEntity(other)
except TypeError:
raise NotImplementedError from None
- return bool(self.date_time > other.date_time)
+ return bool(self.date_time > other_temporal.date_time)
def __str__(self) -> str:
"""Render temporal entity with format fitting for its precision."""
@@ -308,7 +295,7 @@ def __str__(self) -> str:
)
def __repr__(self) -> str:
- """Render a presentation showing this is not just a datetime."""
+ """Overwrite the default representation."""
return f'{self.__class__.__name__}("{self}")'
diff --git a/mex/common/types/text.py b/mex/common/types/text.py
index 2f406fb8..14f55f8f 100644
--- a/mex/common/types/text.py
+++ b/mex/common/types/text.py
@@ -54,10 +54,6 @@ def validate_strings(cls, value: Any) -> dict[str, Any]:
return value
raise ValueError(f"Allowed input types are dict and str, got {type(value)}")
- def __str__(self) -> str:
- """Return the text value."""
- return self.value
-
def __hash__(self) -> int:
"""Return the hash of Text."""
return hash((self.value, self.language))
diff --git a/mex/common/types/vocabulary.py b/mex/common/types/vocabulary.py
index f17b06f5..1426dcad 100644
--- a/mex/common/types/vocabulary.py
+++ b/mex/common/types/vocabulary.py
@@ -5,7 +5,14 @@
from importlib.resources import files
from typing import TYPE_CHECKING, ClassVar, Self, Union
-from pydantic import AnyUrl, BaseModel
+from pydantic import (
+ AnyUrl,
+ BaseModel,
+ GetCoreSchemaHandler,
+ GetJsonSchemaHandler,
+ json_schema,
+)
+from pydantic_core import core_schema
from mex.common.utils import normalize
@@ -15,6 +22,7 @@
from mex.common.types import Text
MODEL_VOCABULARIES = files("mex.model.vocabularies")
+VOCABULARY_PATTERN = r"https://mex.rki.de/item/[a-z0-9-]+"
class BilingualText(BaseModel):
@@ -71,10 +79,6 @@ class VocabularyEnum(Enum, metaclass=VocabularyLoader):
__vocabulary__: ClassVar[str]
__concepts__: ClassVar[list[Concept]]
- def __repr__(self) -> str:
- """Overwrite representation because dynamic enum names are unknown to mypy."""
- return f'{self.__class__.__name__}["{self.name}"]'
-
@classmethod
def find(cls, search_term: Union[str, "Text"]) -> Self | None:
"""Get the enum instance that matches a label of the underlying concepts.
@@ -99,12 +103,50 @@ def find(cls, search_term: Union[str, "Text"]) -> Self | None:
continue
if language is None:
searchable_labels.extend([normalize(label.de), normalize(label.en)])
- elif language_label := label.dict().get(language.value):
+ elif language_label := label.model_dump().get(language.value):
searchable_labels.append(normalize(language_label))
if search_term in searchable_labels:
return cls(str(concept.identifier))
return None
+ @classmethod
+ def __get_pydantic_core_schema__(
+ cls, source_type: object, handler: GetCoreSchemaHandler
+ ) -> core_schema.CoreSchema:
+ """Modify the core schema to add the vocabulary regex."""
+ return core_schema.json_or_python_schema(
+ json_schema=core_schema.union_schema(
+ [
+ core_schema.str_schema(pattern=VOCABULARY_PATTERN),
+ core_schema.no_info_plain_validator_function(cls),
+ ],
+ ),
+ python_schema=core_schema.chain_schema(
+ [
+ core_schema.is_instance_schema(cls | str),
+ core_schema.no_info_plain_validator_function(cls),
+ ]
+ ),
+ serialization=core_schema.plain_serializer_function_ser_schema(
+ lambda s: s.value,
+ when_used="unless-none",
+ ),
+ )
+
+ @classmethod
+ def __get_pydantic_json_schema__(
+ cls, core_schema_: core_schema.CoreSchema, handler: GetJsonSchemaHandler
+ ) -> json_schema.JsonSchemaValue:
+ """Modify the json schema to add the scheme and an example."""
+ json_schema_ = handler(core_schema_)
+ json_schema_["examples"] = [f"https://mex.rki.de/item/{cls.__vocabulary__}-1"]
+ json_schema_["useScheme"] = f"https://mex.rki.de/item/{cls.__vocabulary__}"
+ return json_schema_
+
+ def __repr__(self) -> str:
+ """Overwrite representation because dynamic enum names are unknown to mypy."""
+ return f'{self.__class__.__name__}["{self.name}"]'
+
class AccessRestriction(VocabularyEnum):
"""The access restriction type."""
diff --git a/tests/backend_api/test_connector.py b/tests/backend_api/test_connector.py
index a2de406d..083c0f38 100644
--- a/tests/backend_api/test_connector.py
+++ b/tests/backend_api/test_connector.py
@@ -133,7 +133,7 @@ def test_get_merged_item_mocked(
"GET",
"http://localhost:8080/v0/merged-item",
{
- "stableTargetId": "NGwfzG8ROsrvIiQIVDVy",
+ "identifier": "NGwfzG8ROsrvIiQIVDVy",
"limit": "1",
},
headers={
@@ -156,7 +156,7 @@ def test_get_merged_item_error_mocked(mocked_backend: MagicMock) -> None:
"GET",
"http://localhost:8080/v0/merged-item",
{
- "stableTargetId": "NGwfzG8ROsrvIiQIVDVy",
+ "identifier": "NGwfzG8ROsrvIiQIVDVy",
"limit": "1",
},
headers={
diff --git a/tests/models/test_model_schemas.py b/tests/models/test_model_schemas.py
index 5e31b283..c828485c 100644
--- a/tests/models/test_model_schemas.py
+++ b/tests/models/test_model_schemas.py
@@ -10,7 +10,7 @@
from mex.common.models import EXTRACTED_MODEL_CLASSES, BaseModel
from mex.common.transform import dromedary_to_kebab
-from mex.common.types.identifier import MEX_ID_PATTERN
+from mex.common.types import IDENTIFIER_PATTERN, VOCABULARY_PATTERN
MEX_MODEL_ENTITIES = files("mex.model.entities")
@@ -137,13 +137,11 @@ def prepare_field(field: str, obj: list[Any] | dict[str, Any]) -> None:
# pop annotations that we don't compare directly but use for other comparisons
title = obj.pop("title", "") # only in model (autogenerated by pydantic)
- use_scheme = obj.pop("useScheme", "") # only in spec (needed to select vocabulary)
- vocabulary = use_scheme.removeprefix("https://mex.rki.de/item/") # vocabulary name
# align reference paths
# (the paths to referenced vocabularies and types differ between the models
# and the specification, so we need to make sure they match before comparing)
- if obj.get("pattern") == MEX_ID_PATTERN:
+ if obj.get("pattern") == IDENTIFIER_PATTERN:
obj.pop("pattern")
obj.pop("type")
if field in ("identifier", "stableTargetId"):
@@ -156,9 +154,10 @@ def prepare_field(field: str, obj: list[Any] | dict[str, Any]) -> None:
)
# align concept/enum annotations
- # (spec uses `useScheme` to specify vocabularies and models use enums)
if obj.get("$ref") == "/schema/entities/concept#/identifier":
- obj["$ref"] = f"/schema/fields/{vocabulary}"
+ obj["pattern"] = VOCABULARY_PATTERN
+ obj["type"] = "string"
+ obj.pop("$ref")
# make sure all refs have paths in kebab-case
# (the models use the class names, whereas the spec uses kebab-case URLs)
diff --git a/tests/test_settings.py b/tests/test_settings.py
index 64680fdc..2a4d375e 100644
--- a/tests/test_settings.py
+++ b/tests/test_settings.py
@@ -88,13 +88,8 @@ class DummySettings(BaseSettings):
sub_model=SubModel(sub_model_path=relative),
)
- settings_dict = settings.model_dump()
- assert settings_dict["non_path"] == "blablabla"
- assert settings_dict["abs_work_path"] == absolute
- assert settings_dict["rel_work_path"] == WorkPath(settings.work_dir / relative)
- assert settings_dict["assets_path"] == AssetsPath(
- absolute / "assets_dir" / relative
- )
- assert settings_dict["sub_model"]["sub_model_path"] == WorkPath(
- settings.work_dir / relative
- )
+ assert settings.non_path == "blablabla"
+ assert settings.abs_work_path == absolute
+ assert settings.rel_work_path == WorkPath(settings.work_dir / relative)
+ assert settings.assets_path == AssetsPath(absolute / "assets_dir" / relative)
+ assert settings.sub_model.sub_model_path == WorkPath(settings.work_dir / relative)
diff --git a/tests/types/test_data/dummy-vocabulary.json b/tests/types/test_data/dummy-vocabulary.json
index 4852a481..fd503d2d 100644
--- a/tests/types/test_data/dummy-vocabulary.json
+++ b/tests/types/test_data/dummy-vocabulary.json
@@ -8,8 +8,8 @@
"de": "desc-de-one",
"en": "desc-en-one"
},
- "identifier": "https://dummy/concept-one",
- "inScheme": "https://dummy/concept",
+ "identifier": "https://mex.rki.de/item/dummy-concept-1",
+ "inScheme": "https://mex.rki.de/item/dummy-concept",
"prefLabel": {
"de": "pref-de-one",
"en": "pref-en-one"
@@ -17,8 +17,8 @@
},
{
"definition": null,
- "identifier": "https://dummy/concept-two",
- "inScheme": "https://dummy/concept",
+ "identifier": "https://mex.rki.de/item/dummy-concept-2",
+ "inScheme": "https://mex.rki.de/item/dummy-concept",
"prefLabel": {
"de": "pref-de-two",
"en": "pref-en-two"
diff --git a/tests/types/test_email.py b/tests/types/test_email.py
index a18eb7a4..56485bbd 100644
--- a/tests/types/test_email.py
+++ b/tests/types/test_email.py
@@ -1,16 +1,53 @@
import pytest
from pydantic import BaseModel, ValidationError
-from mex.common.types import Email
+from mex.common.types import EMAIL_PATTERN, Email
class DummyModel(BaseModel):
email: Email
-def test_email() -> None:
+def test_email_validation() -> None:
model = DummyModel.model_validate({"email": "wasd@def.ghi"})
- assert model.email == "wasd@def.ghi"
+ assert model.email == Email("wasd@def.ghi")
+
+ model = DummyModel.model_validate({"email": Email("wasd@def.ghi")})
+ assert model.email == Email("wasd@def.ghi")
+
+ model = DummyModel(email=Email("wasd@def.ghi"))
+ assert model.email == Email("wasd@def.ghi")
with pytest.raises(ValidationError):
DummyModel.model_validate({"email": "foobar"})
+
+ with pytest.raises(ValidationError):
+ DummyModel.model_validate({"email": object()})
+
+
+def test_email_serialization() -> None:
+ model = DummyModel.model_validate({"email": "wasd@def.ghi"})
+ raw = model.model_dump()
+
+ assert raw == {"email": "wasd@def.ghi"}
+
+
+def test_email_schema() -> None:
+ assert DummyModel.model_json_schema() == {
+ "properties": {
+ "email": {
+ "examples": ["info@rki.de"],
+ "format": "email",
+ "pattern": EMAIL_PATTERN,
+ "title": "Email",
+ "type": "string",
+ }
+ },
+ "required": ["email"],
+ "title": "DummyModel",
+ "type": "object",
+ }
+
+
+def test_email_repr() -> None:
+ assert repr(Email("wasd@def.ghi")) == 'Email("wasd@def.ghi")'
diff --git a/tests/types/test_identifier.py b/tests/types/test_identifier.py
index 1f350b4e..7b6dcbe0 100644
--- a/tests/types/test_identifier.py
+++ b/tests/types/test_identifier.py
@@ -4,59 +4,58 @@
from pydantic import BaseModel, ValidationError
from pytest import MonkeyPatch
-from mex.common.types import Identifier
+from mex.common.types import IDENTIFIER_PATTERN, Identifier
-class DummyID(Identifier):
+class DummyIdentifier(Identifier):
pass
class DummyModel(BaseModel):
- id: Identifier
- dummy: DummyID | None = None
+ id: DummyIdentifier
-def test_identifier_validates() -> None:
- model_with_obj = DummyModel.model_validate({"id": Identifier("bFQoRhcVH5DIfZ")})
- model_with_raw = DummyModel.model_validate({"id": "bFQoRhcVH5DIfZ"})
- model_with_raw_uuid = DummyModel.model_validate(
- {"id": "00000000-0000-4000-8000-000000000539"}
- )
- model_with_uuid_obj = DummyModel.model_validate({"id": UUID(int=1337, version=4)})
-
- assert (
- model_with_obj.id
- == model_with_raw.id
- == model_with_raw_uuid.id
- == model_with_uuid_obj.id
- == Identifier.generate(seed=1337)
- )
+def test_identifier_validation() -> None:
+ model = DummyModel.model_validate({"id": "bFQoRhcVH5DIfZ"})
+ assert model.id == DummyIdentifier("bFQoRhcVH5DIfZ")
+
+ model = DummyModel.model_validate({"id": DummyIdentifier("bFQoRhcVH5DIfZ")})
+ assert model.id == DummyIdentifier("bFQoRhcVH5DIfZ")
+
+ model = DummyModel(id=DummyIdentifier("bFQoRhcVH5DIfZ"))
+ assert model.id == DummyIdentifier("bFQoRhcVH5DIfZ")
with pytest.raises(ValidationError):
DummyModel.model_validate({"id": "baaiaaaboi!!!"})
with pytest.raises(ValidationError):
- DummyModel.model_validate({"id": 42})
+ DummyModel.model_validate({"id": object()})
-def test_identifier_modifies_schema() -> None:
- assert DummyModel.model_json_schema()["properties"]["id"] == {
- "title": "Identifier",
- "type": "string",
- "pattern": r"^[a-zA-Z0-9]{14,22}$",
- }
- assert DummyModel.model_json_schema()["properties"]["dummy"] == {
- "anyOf": [
- {"pattern": "^[a-zA-Z0-9]{14,22}$", "title": "DummyID", "type": "string"},
- {"type": "null"},
- ],
- "default": None,
- "title": "Dummy",
+def test_identifier_serialization() -> None:
+ model = DummyModel(id=DummyIdentifier("bFQoRhcVH5DIfZ"))
+ raw = model.model_dump()
+
+ assert raw == {"id": "bFQoRhcVH5DIfZ"}
+
+
+def test_identifier_schema() -> None:
+ assert DummyModel.model_json_schema() == {
+ "properties": {
+ "id": {
+ "pattern": IDENTIFIER_PATTERN,
+ "title": "DummyIdentifier",
+ "type": "string",
+ }
+ },
+ "required": ["id"],
+ "title": "DummyModel",
+ "type": "object",
}
def test_identifier_repr() -> None:
- assert repr(Identifier("baaiaaaaaaaboi")) == "Identifier('baaiaaaaaaaboi')"
+ assert repr(Identifier("baaiaaaaaaaboi")) == 'Identifier("baaiaaaaaaaboi")'
def test_identifier_generate(monkeypatch: MonkeyPatch) -> None:
diff --git a/tests/types/test_link.py b/tests/types/test_link.py
index 8e96d5c5..000c2866 100644
--- a/tests/types/test_link.py
+++ b/tests/types/test_link.py
@@ -1,56 +1,38 @@
-from pydantic import BaseModel
+import pytest
+from pydantic import BaseModel, ValidationError
from mex.common.types import Link, LinkLanguage
-def test_parsing_from_string() -> None:
- class DummyModel(BaseModel):
- link: Link
+class DummyModel(BaseModel):
+ link: Link
- # plain link
- model = DummyModel.model_validate({"link": "https://example.com"})
- assert model.model_dump(exclude_none=True) == {
- "link": {"url": "https://example.com"}
- }
- # link with title
- model = DummyModel.model_validate({"link": "[Example](https://example.com)"})
- assert model.model_dump(exclude_none=True) == {
- "link": {"url": "https://example.com", "title": "Example"}
- }
+def test_link_validation() -> None:
+ with pytest.raises(ValidationError, match="Allowed input types are dict and str"):
+ _ = DummyModel.model_validate({"link": 1})
- # link with funky characters
- model = DummyModel.model_validate(
- {"link": r"[\[TEST\] Example](https://example.com/test?q=\(\.\*\))"}
- )
- assert model.model_dump(exclude_none=True) == {
- "link": {"url": "https://example.com/test?q=(.*)", "title": "[TEST] Example"}
+ model = DummyModel.model_validate({"link": "https://example.com"})
+ assert model.model_dump() == {
+ "link": {
+ "language": None,
+ "title": None,
+ "url": "https://example.com",
+ }
}
- # nested model
model = DummyModel.model_validate(
{"link": {"url": "https://example.com", "title": "Example", "language": "en"}}
)
- assert model.model_dump(exclude_none=True) == {
+ assert model.model_dump() == {
"link": {
- "url": "https://example.com",
- "title": "Example",
"language": LinkLanguage.EN,
+ "title": "Example",
+ "url": "https://example.com",
}
}
-def test_rendering_as_string() -> None:
- # plain link
- link = Link.model_validate({"url": "https://example.com"})
- assert str(link) == "https://example.com"
-
- # link with title
- link = Link.model_validate({"url": "https://example.com", "title": "Example"})
- assert str(link) == r"[Example](https://example\.com)"
-
- # link with funky characters
- link = Link.model_validate(
- {"url": "https://example.com/test?q=(.*)", "title": "[TEST] Example"}
- )
- assert str(link) == r"[\[TEST\] Example](https://example\.com/test?q=\(\.\*\))"
+def test_link_hash() -> None:
+ link = Link(url="https://foo.bar", title="Hallo Welt.", language=LinkLanguage.DE)
+ assert hash(link) == hash(("https://foo.bar", "Hallo Welt.", LinkLanguage.DE))
diff --git a/tests/types/test_temporal_entity.py b/tests/types/test_temporal_entity.py
index 3f3f18be..53d7dd42 100644
--- a/tests/types/test_temporal_entity.py
+++ b/tests/types/test_temporal_entity.py
@@ -103,15 +103,6 @@ def test_temporal_entity_value_errors(
cls(*args, **kwargs)
-@pytest.mark.parametrize(
- ("value", "message"),
- [(object(), "Cannot parse as TemporalEntity")],
-)
-def test_temporal_entity_validation_errors(value: Any, message: str) -> None:
- with pytest.raises(TypeError, match=message):
- TemporalEntity.validate(value)
-
-
@pytest.mark.parametrize(
("cls", "args", "kwargs", "expected"),
[
@@ -271,10 +262,31 @@ def test_temporal_entity_repr() -> None:
)
-def test_temporal_entity_serialization() -> None:
- class Person(BaseModel):
- birthday: YearMonthDay
+class DummyModel(BaseModel):
+ birthday: YearMonthDay
- person = Person.model_validate({"birthday": "24th July 1999"})
+
+def test_temporal_entity_schema() -> None:
+ assert DummyModel.model_json_schema() == {
+ "properties": {
+ "birthday": {
+ "examples": ["2014-08-24"],
+ "format": "date",
+ "pattern": "^\\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$",
+ "title": "YearMonthDay",
+ "type": "string",
+ }
+ },
+ "required": ["birthday"],
+ "title": "DummyModel",
+ "type": "object",
+ }
+
+
+DummyModel.model_json_schema()
+
+
+def test_temporal_entity_serialization() -> None:
+ person = DummyModel.model_validate({"birthday": "24th July 1999"})
assert person.model_dump_json() == '{"birthday":"1999-07-24"}'
diff --git a/tests/types/test_text.py b/tests/types/test_text.py
index 494814f1..f9f686d6 100644
--- a/tests/types/test_text.py
+++ b/tests/types/test_text.py
@@ -33,23 +33,27 @@ def test_text_language_detect() -> None:
assert none_text.language is None
-def test_parsing_from_string() -> None:
- class DummyModel(BaseModel):
- text: Text
+class DummyModel(BaseModel):
+ text: Text
+
+
+def test_text_validation() -> None:
+ with pytest.raises(ValidationError, match="Allowed input types are dict and str"):
+ _ = DummyModel.model_validate({"text": 1})
model = DummyModel.model_validate({"text": "we are parsing a string here"})
assert model.model_dump() == {
"text": {"value": "we are parsing a string here", "language": TextLanguage.EN}
}
- with pytest.raises(ValidationError):
- _ = DummyModel.model_validate({"text": 1})
-
model = DummyModel.model_validate(
- {"text": {"value": "and here, we parsing an object"}}
+ {"text": {"value": "and here, we are parsing an object"}}
)
assert model.model_dump() == {
- "text": {"value": "and here, we parsing an object", "language": TextLanguage.EN}
+ "text": {
+ "value": "and here, we are parsing an object",
+ "language": TextLanguage.EN,
+ }
}
model = DummyModel.model_validate(
@@ -68,11 +72,6 @@ class DummyModel(BaseModel):
}
-def test_text_str() -> None:
- text = Text(value="Hello world.")
- assert str(text) == "Hello world."
-
-
def test_text_hash() -> None:
text = Text(value="Hallo Welt.", language=TextLanguage.DE)
assert hash(text) == hash(("Hallo Welt.", TextLanguage.DE))
diff --git a/tests/types/test_vocabulary.py b/tests/types/test_vocabulary.py
index 52805e0d..1e2d0d40 100644
--- a/tests/types/test_vocabulary.py
+++ b/tests/types/test_vocabulary.py
@@ -35,8 +35,8 @@ class DummyEnum(VocabularyEnum):
# check enum values are loaded correctly
assert [c.value for c in DummyEnum] == [
- "https://dummy/concept-one",
- "https://dummy/concept-two",
+ "https://mex.rki.de/item/dummy-concept-1",
+ "https://mex.rki.de/item/dummy-concept-2",
]
# check enum instance representation
@@ -51,13 +51,41 @@ class DummyModel(BaseModel):
# check wrong value raises error
with pytest.raises(ValidationError):
- DummyModel.model_validate({"dummy": "https://dummy/not-a-valid-concept"})
+ DummyModel.model_validate(
+ {"dummy": "https://mex.rki.de/item/not-a-valid-concept"}
+ )
# check parsing from string works
- model = DummyModel.model_validate({"dummy": "https://dummy/concept-two"})
+ model = DummyModel.model_validate(
+ {"dummy": "https://mex.rki.de/item/dummy-concept-2"}
+ )
assert model.dummy == DummyEnum["PREF_EN_TWO"]
+@pytest.mark.usefixtures("use_dummy_vocabulary")
+def test_vocabulary_enum_schema() -> None:
+ class DummyEnum(VocabularyEnum):
+ __vocabulary__ = "dummy-vocabulary"
+
+ class DummyModel(BaseModel):
+ dummy: DummyEnum
+
+ assert DummyModel.model_json_schema() == {
+ "properties": {
+ "dummy": {
+ "examples": ["https://mex.rki.de/item/dummy-vocabulary-1"],
+ "pattern": "https://mex.rki.de/item/[a-z0-9-]+",
+ "title": "Dummy",
+ "type": "string",
+ "useScheme": "https://mex.rki.de/item/dummy-vocabulary",
+ }
+ },
+ "required": ["dummy"],
+ "title": "DummyModel",
+ "type": "object",
+ }
+
+
@pytest.mark.usefixtures("use_dummy_vocabulary")
def test_vocabulary_enum_find() -> None:
class DummyEnum(VocabularyEnum):
@@ -68,4 +96,4 @@ class DummyEnum(VocabularyEnum):
found_enum = DummyEnum.find("pref-de-one")
assert found_enum is not None
- assert found_enum.value == "https://dummy/concept-one"
+ assert found_enum.value == "https://mex.rki.de/item/dummy-concept-1"