diff --git a/CHANGELOG.md b/CHANGELOG.md index 2dc7a63f..73871d42 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,12 +9,33 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- add pattern constants for vocabs, emails, urls and ids to types module +- add regex pattern to json schema of identifier fields +- automatically add examples and useScheme to json schema of enum fields + ### Changes +- BREAKING: use `identifier` instead of `stableTargetId` to get merged item from backend +- ensure identifier unions are typed to generic `Identifier` instead of the first match + to signal that we don't actually know which of the union types is correct +- unify pydantic schema configuration for all types +- consistently parse emails, identifiers and temporals in models to their type, not str +- consistently serialize emails, ids and temporals in models to str, not their type +- make instances of Link type hashable, to harmonize them with Text models + ### Deprecated ### Removed +- drop manual examples from enum fields, because they are autogenerated now +- BREAKING: remove `MEX_ID_PATTERN` from types, in favor of `IDENTIFIER_PATTERN` +- BREAKING: make public `MEX_ID_ALPHABET` constant from identifier module private +- BREAKING: remove `__str__` methods from Text and Link classes +- BREAKING: drop support for parsing UUIDs as Identifiers, this was unused +- BREAKING: drop support for parsing Links from markdown syntax, this was unused +- BREAKING: remove pydantic1-style `validate` methods from all type models +- BREAKING: `BackendApiConnector.post_models` in favor of `post_extracted_items` + ### Fixed ### Security @@ -22,8 +43,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [0.37.0] - 2024-10-01 ### Added -- added methods for extracting persons by name or ID from ldap +- added methods for extracting persons by name or ID from ldap - `contains_only_types` to check if fields are annotated as desired - `group_fields_by_class_name` utility to simplify filtered model/field lookups - new parameters to `get_inner_types` to customize what to unpack diff --git a/mex/common/backend_api/connector.py b/mex/common/backend_api/connector.py index 4db67cfe..94731488 100644 --- a/mex/common/backend_api/connector.py +++ b/mex/common/backend_api/connector.py @@ -1,4 +1,3 @@ -from typing import cast from urllib.parse import urljoin from requests.exceptions import HTTPError @@ -19,7 +18,6 @@ AnyRuleSetResponse, ) from mex.common.settings import BaseSettings -from mex.common.types import AnyExtractedIdentifier class BackendApiConnector(HTTPConnector): @@ -41,27 +39,6 @@ def _set_url(self) -> None: settings = BaseSettings.get() self.url = urljoin(str(settings.backend_api_url), self.API_VERSION) - def post_models( - self, - extracted_items: list[AnyExtractedModel], - ) -> list[AnyExtractedIdentifier]: - """Post extracted models to the backend in bulk. - - Args: - extracted_items: Extracted models to post - - Raises: - HTTPError: If post was not accepted, crashes or times out - - Returns: - Identifiers of posted extracted models - """ - # XXX deprecated method, please use `post_extracted_models` instead - return cast( - list[AnyExtractedIdentifier], - self.post_extracted_items(extracted_items).identifiers, - ) - def post_extracted_items( self, extracted_items: list[AnyExtractedModel], @@ -141,7 +118,6 @@ def fetch_merged_items( Returns: One page of merged items and the total count that was matched """ - # XXX this endpoint will only return faux merged items for now (MX-1382) response = self.request( method="GET", endpoint="merged-item", @@ -156,12 +132,12 @@ def fetch_merged_items( def get_merged_item( self, - stable_target_id: str, + identifier: str, ) -> AnyMergedModel: - """Return one merged item for the given `stableTargetId`. + """Return one merged item for the given `identifier`. Args: - stable_target_id: The merged item's identifier + identifier: The merged item's identifier Raises: MExError: If no merged item was found @@ -174,7 +150,7 @@ def get_merged_item( method="GET", endpoint="merged-item", params={ - "stableTargetId": stable_target_id, + "identifier": identifier, "limit": "1", }, ) @@ -201,7 +177,6 @@ def preview_merged_item( Returns: A single merged item """ - # XXX experimental method until the backend has a preview endpoint (MX-1406) response = self.request( method="GET", endpoint=f"preview-item/{stable_target_id}", @@ -224,7 +199,6 @@ def get_rule_set( Returns: A set of three rules """ - # XXX experimental method until the backend has a rule-set endpoint (MX-1416) response = self.request( method="GET", endpoint=f"rule-set/{stable_target_id}", diff --git a/mex/common/models/access_platform.py b/mex/common/models/access_platform.py index e068952c..38b0daf8 100644 --- a/mex/common/models/access_platform.py +++ b/mex/common/models/access_platform.py @@ -2,7 +2,7 @@ from typing import Annotated, ClassVar, Literal -from pydantic import Field, computed_field +from pydantic import AfterValidator, Field, computed_field from mex.common.models.base.extracted_data import ExtractedData from mex.common.models.base.merged_item import MergedItem @@ -16,6 +16,7 @@ from mex.common.types import ( APIType, ExtractedAccessPlatformIdentifier, + Identifier, Link, MergedAccessPlatformIdentifier, MergedContactPointIdentifier, @@ -36,9 +37,12 @@ class _Stem(BaseModel): class _OptionalLists(_Stem): alternativeTitle: list[Text] = [] contact: list[ - MergedOrganizationalUnitIdentifier - | MergedPersonIdentifier - | MergedContactPointIdentifier + Annotated[ + MergedOrganizationalUnitIdentifier + | MergedPersonIdentifier + | MergedContactPointIdentifier, + AfterValidator(Identifier), + ] ] = [] description: list[Text] = [] landingPage: list[Link] = [] @@ -48,39 +52,23 @@ class _OptionalLists(_Stem): class _OptionalValues(_Stem): endpointDescription: Link | None = None - endpointType: ( - Annotated[APIType, Field(examples=["https://mex.rki.de/item/api-type-1"])] - | None - ) = None + endpointType: APIType | None = None endpointURL: Link | None = None class _RequiredValues(_Stem): - technicalAccessibility: Annotated[ - TechnicalAccessibility, - Field(examples=["https://mex.rki.de/item/technical-accessibility-1"]), - ] + technicalAccessibility: TechnicalAccessibility class _SparseValues(_Stem): - technicalAccessibility: Annotated[ - TechnicalAccessibility | None, - Field(examples=["https://mex.rki.de/item/technical-accessibility-1"]), - ] = None + technicalAccessibility: TechnicalAccessibility | None = None class _VariadicValues(_Stem): endpointDescription: list[Link] - endpointType: list[ - Annotated[APIType, Field(examples=["https://mex.rki.de/item/api-type-1"])] - ] = [] + endpointType: list[APIType] = [] endpointURL: list[Link] = [] - technicalAccessibility: list[ - Annotated[ - TechnicalAccessibility, - Field(examples=["https://mex.rki.de/item/technical-accessibility-1"]), - ] - ] = [] + technicalAccessibility: list[TechnicalAccessibility] = [] class BaseAccessPlatform(_OptionalLists, _OptionalValues, _RequiredValues): diff --git a/mex/common/models/activity.py b/mex/common/models/activity.py index 8ed2de24..03e1bd25 100644 --- a/mex/common/models/activity.py +++ b/mex/common/models/activity.py @@ -5,7 +5,7 @@ from typing import Annotated, ClassVar, Literal -from pydantic import Field, computed_field +from pydantic import AfterValidator, Field, computed_field from mex.common.models.base.extracted_data import ExtractedData from mex.common.models.base.merged_item import MergedItem @@ -19,6 +19,7 @@ from mex.common.types import ( ActivityType, ExtractedActivityIdentifier, + Identifier, Link, MergedActivityIdentifier, MergedContactPointIdentifier, @@ -39,15 +40,16 @@ class _Stem(BaseModel): class _OptionalLists(_Stem): abstract: list[Text] = [] - activityType: list[ - Annotated[ - ActivityType, Field(examples=["https://mex.rki.de/item/activity-type-1"]) - ] - ] = [] + activityType: list[ActivityType] = [] alternativeTitle: list[Text] = [] documentation: list[Link] = [] end: list[YearMonthDay | YearMonth] = [] - externalAssociate: list[MergedOrganizationIdentifier | MergedPersonIdentifier] = [] + externalAssociate: list[ + Annotated[ + MergedOrganizationIdentifier | MergedPersonIdentifier, + AfterValidator(Identifier), + ] + ] = [] funderOrCommissioner: list[MergedOrganizationIdentifier] = [] fundingProgram: list[str] = [] involvedPerson: list[MergedPersonIdentifier] = [] @@ -57,18 +59,19 @@ class _OptionalLists(_Stem): shortName: list[Text] = [] start: list[YearMonthDay | YearMonth] = [] succeeds: list[MergedActivityIdentifier] = [] - theme: list[ - Annotated[Theme, Field(examples=["https://mex.rki.de/item/theme-1"])] - ] = [] + theme: list[Theme] = [] website: list[Link] = [] class _RequiredLists(_Stem): contact: Annotated[ list[ - MergedOrganizationalUnitIdentifier - | MergedPersonIdentifier - | MergedContactPointIdentifier, + Annotated[ + MergedOrganizationalUnitIdentifier + | MergedPersonIdentifier + | MergedContactPointIdentifier, + AfterValidator(Identifier), + ] ], Field(min_length=1), ] @@ -80,9 +83,12 @@ class _RequiredLists(_Stem): class _SparseLists(_Stem): contact: list[ - MergedOrganizationalUnitIdentifier - | MergedPersonIdentifier - | MergedContactPointIdentifier, + Annotated[ + MergedOrganizationalUnitIdentifier + | MergedPersonIdentifier + | MergedContactPointIdentifier, + AfterValidator(Identifier), + ] ] = [] responsibleUnit: list[MergedOrganizationalUnitIdentifier] = [] title: list[Text] = [] diff --git a/mex/common/models/consent.py b/mex/common/models/consent.py new file mode 100644 index 00000000..3e62ee0b --- /dev/null +++ b/mex/common/models/consent.py @@ -0,0 +1,3 @@ +# XXX this is a forward-compatibility hint for feature/model-update-v3: +# when this gets merged with model v3, remove the +# `Annotated[..., Field(examples=["https://mex..."])]` from all enum fields diff --git a/mex/common/models/distribution.py b/mex/common/models/distribution.py index 295374e8..16bac711 100644 --- a/mex/common/models/distribution.py +++ b/mex/common/models/distribution.py @@ -59,26 +59,13 @@ class _OptionalValues(_Stem): accessService: MergedAccessPlatformIdentifier | None = None accessURL: Link | None = None downloadURL: Link | None = None - license: ( - Annotated[License, Field(examples=["https://mex.rki.de/item/license-1"])] | None - ) = None - mediaType: ( - Annotated[ - MIMEType, - Field( - examples=["https://mex.rki.de/item/mime-type-1"], - ), - ] - | None - ) = None + license: License | None = None + mediaType: MIMEType | None = None modified: YearMonthDayTime | YearMonthDay | YearMonth | None = None class _RequiredValues(_Stem): - accessRestriction: Annotated[ - AccessRestriction, - Field(examples=["https://mex.rki.de/item/access-restriction-1"]), - ] + accessRestriction: AccessRestriction issued: YearMonthDayTime | YearMonthDay | YearMonth title: Annotated[ str, @@ -90,13 +77,7 @@ class _RequiredValues(_Stem): class _SparseValues(_Stem): - accessRestriction: ( - Annotated[ - AccessRestriction, - Field(examples=["https://mex.rki.de/item/access-restriction-1"]), - ] - | None - ) = None + accessRestriction: AccessRestriction | None = None issued: YearMonthDayTime | YearMonthDay | YearMonth | None = None title: ( Annotated[ @@ -111,12 +92,7 @@ class _SparseValues(_Stem): class _VariadicValues(_Stem): - accessRestriction: list[ - Annotated[ - AccessRestriction, - Field(examples=["https://mex.rki.de/item/access-restriction-1"]), - ] - ] = [] + accessRestriction: list[AccessRestriction] = [] issued: list[YearMonthDayTime | YearMonthDay | YearMonth] = [] title: list[ Annotated[ diff --git a/mex/common/models/primary_source.py b/mex/common/models/primary_source.py index f96f307c..0cee5692 100644 --- a/mex/common/models/primary_source.py +++ b/mex/common/models/primary_source.py @@ -2,7 +2,7 @@ from typing import Annotated, ClassVar, Literal -from pydantic import Field, computed_field +from pydantic import AfterValidator, Field, computed_field from mex.common.models.base.extracted_data import ExtractedData from mex.common.models.base.merged_item import MergedItem @@ -15,6 +15,7 @@ ) from mex.common.types import ( ExtractedPrimarySourceIdentifier, + Identifier, Link, MergedContactPointIdentifier, MergedOrganizationalUnitIdentifier, @@ -33,9 +34,12 @@ class _Stem(BaseModel): class _OptionalLists(_Stem): alternativeTitle: list[Text] = [] contact: list[ - MergedOrganizationalUnitIdentifier - | MergedPersonIdentifier - | MergedContactPointIdentifier + Annotated[ + MergedOrganizationalUnitIdentifier + | MergedPersonIdentifier + | MergedContactPointIdentifier, + AfterValidator(Identifier), + ] ] = [] description: list[Text] = [] documentation: list[Link] = [] diff --git a/mex/common/models/resource.py b/mex/common/models/resource.py index 43df13a6..d7826368 100644 --- a/mex/common/models/resource.py +++ b/mex/common/models/resource.py @@ -2,7 +2,7 @@ from typing import Annotated, ClassVar, Literal -from pydantic import Field, computed_field +from pydantic import AfterValidator, Field, computed_field from mex.common.models.base.extracted_data import ExtractedData from mex.common.models.base.merged_item import MergedItem @@ -19,6 +19,7 @@ DataProcessingState, ExtractedResourceIdentifier, Frequency, + Identifier, Language, License, Link, @@ -47,14 +48,7 @@ class _Stem(BaseModel): class _OptionalLists(_Stem): accessPlatform: list[MergedAccessPlatformIdentifier] = [] alternativeTitle: list[Text] = [] - anonymizationPseudonymization: list[ - Annotated[ - AnonymizationPseudonymization, - Field( - examples=["https://mex.rki.de/item/anonymization-pseudonymization-1"] - ), - ] - ] = [] + anonymizationPseudonymization: list[AnonymizationPseudonymization] = [] contributingUnit: list[MergedOrganizationalUnitIdentifier] = [] contributor: list[MergedPersonIdentifier] = [] creator: list[MergedPersonIdentifier] = [] @@ -66,9 +60,7 @@ class _OptionalLists(_Stem): instrumentToolOrApparatus: list[Text] = [] isPartOf: list[MergedResourceIdentifier] = [] keyword: list[Text] = [] - language: list[ - Annotated[Language, Field(examples=["https://mex.rki.de/item/language-1"])] - ] = [] + language: list[Language] = [] loincId: list[str] = [] meshId: list[ Annotated[ @@ -85,40 +77,26 @@ class _OptionalLists(_Stem): publication: list[Link] = [] publisher: list[MergedOrganizationIdentifier] = [] qualityInformation: list[Text] = [] - resourceTypeGeneral: list[ - Annotated[ - ResourceTypeGeneral, - Field( - examples=["https://mex.rki.de/item/resource-type-general-1"], - ), - ] - ] = [] + resourceTypeGeneral: list[ResourceTypeGeneral] = [] resourceTypeSpecific: list[Text] = [] rights: list[Text] = [] spatial: list[Text] = [] - stateOfDataProcessing: list[ - Annotated[ - DataProcessingState, - Field( - examples=["https://mex.rki.de/item/data-processing-state-1"], - ), - ] - ] = [] + stateOfDataProcessing: list[DataProcessingState] = [] class _RequiredLists(_Stem): contact: Annotated[ list[ - MergedOrganizationalUnitIdentifier - | MergedPersonIdentifier - | MergedContactPointIdentifier + Annotated[ + MergedOrganizationalUnitIdentifier + | MergedPersonIdentifier + | MergedContactPointIdentifier, + AfterValidator(Identifier), + ] ], Field(min_length=1), ] - theme: Annotated[ - list[Annotated[Theme, Field(examples=["https://mex.rki.de/item/theme-1"])]], - Field(min_length=1), - ] + theme: Annotated[list[Theme], Field(min_length=1)] title: Annotated[list[Text], Field(min_length=1)] unitInCharge: Annotated[ list[MergedOrganizationalUnitIdentifier], Field(min_length=1) @@ -127,26 +105,22 @@ class _RequiredLists(_Stem): class _SparseLists(_Stem): contact: list[ - MergedOrganizationalUnitIdentifier - | MergedPersonIdentifier - | MergedContactPointIdentifier - ] = [] - theme: list[ - Annotated[Theme, Field(examples=["https://mex.rki.de/item/theme-1"])] + Annotated[ + MergedOrganizationalUnitIdentifier + | MergedPersonIdentifier + | MergedContactPointIdentifier, + AfterValidator(Identifier), + ] ] = [] + theme: list[Theme] = [] title: list[Text] = [] unitInCharge: list[MergedOrganizationalUnitIdentifier] = [] class _OptionalValues(_Stem): - accrualPeriodicity: ( - Annotated[Frequency, Field(examples=["https://mex.rki.de/item/frequency-1"])] - | None - ) = None + accrualPeriodicity: Frequency | None = None created: YearMonthDayTime | YearMonthDay | YearMonth | None = None - license: ( - Annotated[License, Field(examples=["https://mex.rki.de/item/license-1"])] | None - ) = None + license: License | None = None modified: YearMonthDayTime | YearMonthDay | YearMonth | None = None sizeOfDataBasis: str | None = None temporal: ( @@ -170,42 +144,18 @@ class _OptionalValues(_Stem): class _RequiredValues(_Stem): - accessRestriction: Annotated[ - AccessRestriction, - Field( - examples=["https://mex.rki.de/item/access-restriction-1"], - ), - ] + accessRestriction: AccessRestriction class _SparseValues(_Stem): - accessRestriction: ( - Annotated[ - AccessRestriction, - Field( - examples=["https://mex.rki.de/item/access-restriction-1"], - ), - ] - | None - ) = None + accessRestriction: AccessRestriction | None = None class _VariadicValues(_Stem): - accessRestriction: list[ - Annotated[ - AccessRestriction, - Field( - examples=["https://mex.rki.de/item/access-restriction-1"], - ), - ] - ] = [] - accrualPeriodicity: list[ - Annotated[Frequency, Field(examples=["https://mex.rki.de/item/frequency-1"])] - ] = [] + accessRestriction: list[AccessRestriction] = [] + accrualPeriodicity: list[Frequency] = [] created: list[YearMonthDayTime | YearMonthDay | YearMonth] = [] - license: list[ - Annotated[License, Field(examples=["https://mex.rki.de/item/license-1"])] - ] = [] + license: list[License] = [] modified: list[YearMonthDayTime | YearMonthDay | YearMonth] = [] sizeOfDataBasis: list[str] = [] temporal: list[ diff --git a/mex/common/models/variable.py b/mex/common/models/variable.py index 9e511edb..bd35ffb6 100644 --- a/mex/common/models/variable.py +++ b/mex/common/models/variable.py @@ -86,15 +86,7 @@ class _OptionalValues(_Stem): ] | None ) = None - dataType: ( - Annotated[ - DataType, - Field( - examples=["https://mex.rki.de/item/data-type-1"], - ), - ] - | None - ) = None + dataType: DataType | None = None class _VariadicValues(_Stem): @@ -106,14 +98,7 @@ class _VariadicValues(_Stem): ), ] ] = [] - dataType: list[ - Annotated[ - DataType, - Field( - examples=["https://mex.rki.de/item/data-type-1"], - ), - ] - ] = [] + dataType: list[DataType] = [] class BaseVariable(_OptionalLists, _RequiredLists, _OptionalValues): diff --git a/mex/common/types/__init__.py b/mex/common/types/__init__.py index 8df713b9..6465a4e9 100644 --- a/mex/common/types/__init__.py +++ b/mex/common/types/__init__.py @@ -1,8 +1,8 @@ from typing import Final, Literal, get_args -from mex.common.types.email import Email +from mex.common.types.email import EMAIL_PATTERN, Email from mex.common.types.identifier import ( - MEX_ID_PATTERN, + IDENTIFIER_PATTERN, ExtractedAccessPlatformIdentifier, ExtractedActivityIdentifier, ExtractedContactPointIdentifier, @@ -46,6 +46,7 @@ ) from mex.common.types.text import Text, TextLanguage from mex.common.types.vocabulary import ( + VOCABULARY_PATTERN, AccessRestriction, ActivityType, AnonymizationPseudonymization, @@ -77,6 +78,7 @@ "CET", "DataProcessingState", "DataType", + "EMAIL_PATTERN", "Email", "EXTRACTED_IDENTIFIER_CLASSES_BY_NAME", "EXTRACTED_IDENTIFIER_CLASSES", @@ -93,6 +95,7 @@ "ExtractedVariableGroupIdentifier", "ExtractedVariableIdentifier", "Frequency", + "IDENTIFIER_PATTERN", "Identifier", "IdentityProvider", "Language", @@ -114,7 +117,6 @@ "MergedResourceIdentifier", "MergedVariableGroupIdentifier", "MergedVariableIdentifier", - "MEX_ID_PATTERN", "MIMEType", "NESTED_MODEL_CLASSES_BY_NAME", "NESTED_MODEL_CLASSES", @@ -130,7 +132,9 @@ "Text", "TextLanguage", "Theme", + "URL_PATTERN", "UTC", + "VOCABULARY_PATTERN", "VocabularyEnum", "VocabularyLoader", "WorkPath", diff --git a/mex/common/types/email.py b/mex/common/types/email.py index 89942581..3a98df1b 100644 --- a/mex/common/types/email.py +++ b/mex/common/types/email.py @@ -14,7 +14,13 @@ def __get_pydantic_core_schema__( cls, source_type: Any, handler: GetCoreSchemaHandler ) -> core_schema.CoreSchema: """Modify the core schema to add the email regex.""" - return core_schema.str_schema(pattern=EMAIL_PATTERN) + return core_schema.chain_schema( + [ + core_schema.str_schema(pattern=EMAIL_PATTERN), + core_schema.no_info_plain_validator_function(cls), + ], + serialization=core_schema.to_string_ser_schema(when_used="unless-none"), + ) @classmethod def __get_pydantic_json_schema__( @@ -26,3 +32,7 @@ def __get_pydantic_json_schema__( json_schema_["format"] = "email" json_schema_["examples"] = ["info@rki.de"] return json_schema_ + + def __repr__(self) -> str: + """Overwrite the default representation.""" + return f'{self.__class__.__name__}("{self}")' diff --git a/mex/common/types/identifier.py b/mex/common/types/identifier.py index 2b6eb9b2..c2537dee 100644 --- a/mex/common/types/identifier.py +++ b/mex/common/types/identifier.py @@ -1,4 +1,3 @@ -import re import string from typing import Any, Self from uuid import UUID, uuid4 @@ -6,9 +5,8 @@ from pydantic import GetCoreSchemaHandler, GetJsonSchemaHandler, json_schema from pydantic_core import core_schema -MEX_ID_ALPHABET = string.ascii_letters + string.digits -MEX_ID_PATTERN = r"^[a-zA-Z0-9]{14,22}$" -UUID_PATTERN = r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$" +_ALPHABET = string.ascii_letters + string.digits +IDENTIFIER_PATTERN = r"^[a-zA-Z0-9]{14,22}$" class Identifier(str): @@ -19,35 +17,27 @@ def generate(cls, seed: int | None = None) -> Self: """Generate a new identifier from a seed or random UUID version 4.""" # Inspired by https://pypi.org/project/shortuuid output = "" - alpha_len = len(MEX_ID_ALPHABET) + alpha_len = len(_ALPHABET) if seed is None: number = uuid4().int else: number = UUID(int=seed, version=4).int while number: number, digit = divmod(number, alpha_len) - output += MEX_ID_ALPHABET[digit] + output += _ALPHABET[digit] return cls(output[::-1]) - @classmethod - def validate(cls, value: Any) -> Self: - """Validate a string, UUID or Identifier.""" - if isinstance(value, str | UUID | Identifier): - value = str(value) - if re.match(MEX_ID_PATTERN, value): - return cls(value) - if re.match(UUID_PATTERN, value): - return cls.generate(seed=UUID(value).int) - raise ValueError(f"Invalid identifier format: {value}") - raise ValueError(f"Cannot parse {type(value)} as {cls.__name__}") - @classmethod def __get_pydantic_core_schema__( cls, source_type: Any, handler: GetCoreSchemaHandler ) -> core_schema.CoreSchema: """Modify the core schema to add the ID regex.""" - return core_schema.no_info_before_validator_function( - cls.validate, core_schema.str_schema(pattern=MEX_ID_PATTERN) + return core_schema.chain_schema( + [ + core_schema.str_schema(pattern=IDENTIFIER_PATTERN), + core_schema.no_info_plain_validator_function(cls), + ], + serialization=core_schema.to_string_ser_schema(when_used="unless-none"), ) @classmethod @@ -58,11 +48,12 @@ def __get_pydantic_json_schema__( json_schema_ = handler(core_schema_) json_schema_ = handler.resolve_ref_schema(json_schema_) json_schema_["title"] = cls.__name__ + json_schema_["pattern"] = IDENTIFIER_PATTERN return json_schema_ def __repr__(self) -> str: """Overwrite the default representation.""" - return f"{self.__class__.__name__}({super().__str__().__repr__()})" + return f'{self.__class__.__name__}("{self}")' # We have technically-identical subclasses of identifier types (one per entity-type). diff --git a/mex/common/types/link.py b/mex/common/types/link.py index a29ff7cf..7ed49d54 100644 --- a/mex/common/types/link.py +++ b/mex/common/types/link.py @@ -1,25 +1,9 @@ -import re from enum import StrEnum from typing import Annotated, Any from pydantic import BaseModel, Field, model_validator -# https://daringfireball.net/projects/markdown/syntax#backslash -MARKDOWN_SPECIAL_CHARS = r"\`*_{}[]()#+-.!" - - -def markdown_escape(string: str) -> str: - """Escape all special characters for markdown usage.""" - for char in MARKDOWN_SPECIAL_CHARS: - string = string.replace(char, f"\\{char}") - return string - - -def markdown_unescape(string: str) -> str: - """Unescape all special characters from a markdown string.""" - for char in MARKDOWN_SPECIAL_CHARS: - string = string.replace(f"\\{char}", char) - return string +URL_PATTERN = r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?" class LinkLanguage(StrEnum): @@ -32,10 +16,10 @@ class LinkLanguage(StrEnum): class Link(BaseModel): """Type class for Link objects. - Links can be parsed from nested JSON objects or from markdown strings. + Links can be parsed from nested JSON objects or from raw strings. Example: - Link(url="https://foo", title="Title") == Link.model_validate("[Title](https://foo)") + Link(url="http://foo.bar") == Link.model_validate("http://foo.bar") """ language: LinkLanguage | None = None @@ -43,7 +27,7 @@ class Link(BaseModel): url: Annotated[ str, Field( - pattern=r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?", + pattern=URL_PATTERN, min_length=1, examples=["https://hello-world.org", "file://S:/OE/MF4/Projekte/MEx"], json_schema_extra={"format": "uri"}, @@ -52,23 +36,14 @@ class Link(BaseModel): @model_validator(mode="before") @classmethod - def convert_markdown_to_link(cls, values: Any) -> dict[str, Any]: + def validate_strings(cls, value: Any) -> dict[str, Any]: """Convert string input to dictionary.""" - if isinstance(values, dict): - return values - if isinstance(values, str): - if match := re.match(r"\[(?P.*)\]\((?P<url>.*)\)", values): - return { - key: markdown_unescape(value) - for key, value in match.groupdict().items() - } - return {"url": values} - raise ValueError(f"Allowed input types are dict and str, got {type(values)}") - - def __str__(self) -> str: - """Render the link as markdown if a title is set, otherwise as plain url.""" - if title := self.title: - title = markdown_escape(title) - url = markdown_escape(self.url) - return f"[{title}]({url})" - return self.url + if isinstance(value, str): + return {"url": value} + if isinstance(value, dict): + return value + raise ValueError(f"Allowed input types are dict and str, got {type(value)}") + + def __hash__(self) -> int: + """Return the hash of this link.""" + return hash((self.url, self.title, self.language)) diff --git a/mex/common/types/path.py b/mex/common/types/path.py index 7cc78925..6a1539d1 100644 --- a/mex/common/types/path.py +++ b/mex/common/types/path.py @@ -1,6 +1,6 @@ from os import PathLike from pathlib import Path -from typing import Any, Self, Union +from typing import Any, Union from pydantic import GetCoreSchemaHandler from pydantic_core import core_schema @@ -19,6 +19,19 @@ def __init__(self, path: Union[str, Path, "PathWrapper"]) -> None: path = path._path self._path = path + @classmethod + def __get_pydantic_core_schema__( + cls, source_type: Any, handler: GetCoreSchemaHandler + ) -> core_schema.CoreSchema: + """Modify the core schema to add validation and serialization rules.""" + return core_schema.chain_schema( + [ + core_schema.is_instance_schema(str | Path | PathWrapper), + core_schema.no_info_plain_validator_function(cls), + ], + serialization=core_schema.to_string_ser_schema(when_used="unless-none"), + ) + def __fspath__(self) -> str: """Return the file system path representation.""" return self._path.__fspath__() @@ -49,37 +62,6 @@ def is_relative(self) -> bool: """True if the underlying path is relative.""" return not self._path.is_absolute() - @classmethod - def __get_pydantic_core_schema__( - cls, source_type: Any, handler: GetCoreSchemaHandler - ) -> core_schema.CoreSchema: - """Set schema to str schema.""" - from_str_schema = core_schema.chain_schema( - [ - core_schema.str_schema(), - core_schema.no_info_plain_validator_function( - cls.validate, - ), - ] - ) - from_anything_schema = core_schema.chain_schema( - [ - core_schema.no_info_plain_validator_function(cls.validate), - core_schema.is_instance_schema(PathWrapper), - ] - ) - return core_schema.json_or_python_schema( - json_schema=from_str_schema, - python_schema=from_anything_schema, - ) - - @classmethod - def validate(cls, value: Any) -> Self: - """Convert a string value to a Text instance.""" - if isinstance(value, str | Path | PathWrapper): - return cls(value) - raise ValueError(f"Cannot parse {type(value)} as {cls.__name__}") - class AssetsPath(PathWrapper): """Custom path for settings that can be absolute or relative to `assets_dir`.""" diff --git a/mex/common/types/temporal_entity.py b/mex/common/types/temporal_entity.py index 48f8c8df..3f591db0 100644 --- a/mex/common/types/temporal_entity.py +++ b/mex/common/types/temporal_entity.py @@ -193,27 +193,20 @@ def __get_pydantic_core_schema__( cls, source_type: Any, handler: GetCoreSchemaHandler ) -> core_schema.CoreSchema: """Modify the core schema to add validation and serialization rules.""" - from_str_schema = core_schema.chain_schema( - [ - core_schema.str_schema(pattern=cls.STR_SCHEMA_PATTERN), - core_schema.no_info_plain_validator_function( - cls.validate, - ), - ] - ) - from_anything_schema = core_schema.chain_schema( - [ - core_schema.no_info_plain_validator_function(cls.validate), - core_schema.is_instance_schema(cls), - ] - ) - serialization_schema = core_schema.plain_serializer_function_ser_schema( - lambda instance: str(instance) - ) return core_schema.json_or_python_schema( - json_schema=from_str_schema, - python_schema=from_anything_schema, - serialization=serialization_schema, + json_schema=core_schema.chain_schema( + [ + core_schema.str_schema(pattern=cls.STR_SCHEMA_PATTERN), + core_schema.no_info_plain_validator_function(cls), + ] + ), + python_schema=core_schema.chain_schema( + [ + core_schema.is_instance_schema(cls | date | str | TemporalEntity), + core_schema.no_info_plain_validator_function(cls), + ] + ), + serialization=core_schema.to_string_ser_schema(when_used="unless-none"), ) @classmethod @@ -221,17 +214,10 @@ def __get_pydantic_json_schema__( cls, core_schema_: core_schema.CoreSchema, handler: GetJsonSchemaHandler ) -> json_schema.JsonSchemaValue: """Modify the json schema to add a title, examples and an optional format.""" - json_schema = handler(core_schema_) - json_schema["title"] = cls.__name__ - json_schema.update(cls.JSON_SCHEMA_CONFIG) - return json_schema - - @classmethod - def validate(cls, value: Any) -> "TemporalEntity": - """Parse any value and try to convert it into a temporal entity.""" - if isinstance(value, cls | date | str | TemporalEntity): - return cls(value) - raise TypeError(f"Cannot parse {type(value)} as {cls.__name__}") + json_schema_ = handler(core_schema_) + json_schema_["title"] = cls.__name__ + json_schema_.update(cls.JSON_SCHEMA_CONFIG) + return json_schema_ @staticmethod def _parse_integers( @@ -283,23 +269,24 @@ def _parse_date( """Parse a date and assume the precision is days.""" return datetime(value.year, value.month, value.day), TemporalEntityPrecision.DAY - def __eq__(self, other: object) -> bool: + def __eq__(self, other: Any) -> bool: """Return whether the given other value is the same as this one.""" try: - other = self.validate(other) + other_temporal = TemporalEntity(other) except TypeError: return False return bool( - self.date_time == other.date_time and self.precision == other.precision + self.date_time == other_temporal.date_time + and self.precision == other_temporal.precision ) def __gt__(self, other: Any) -> bool: """Return whether the given other value is the greater than this one.""" try: - other = self.validate(other) + other_temporal = TemporalEntity(other) except TypeError: raise NotImplementedError from None - return bool(self.date_time > other.date_time) + return bool(self.date_time > other_temporal.date_time) def __str__(self) -> str: """Render temporal entity with format fitting for its precision.""" @@ -308,7 +295,7 @@ def __str__(self) -> str: ) def __repr__(self) -> str: - """Render a presentation showing this is not just a datetime.""" + """Overwrite the default representation.""" return f'{self.__class__.__name__}("{self}")' diff --git a/mex/common/types/text.py b/mex/common/types/text.py index 2f406fb8..14f55f8f 100644 --- a/mex/common/types/text.py +++ b/mex/common/types/text.py @@ -54,10 +54,6 @@ def validate_strings(cls, value: Any) -> dict[str, Any]: return value raise ValueError(f"Allowed input types are dict and str, got {type(value)}") - def __str__(self) -> str: - """Return the text value.""" - return self.value - def __hash__(self) -> int: """Return the hash of Text.""" return hash((self.value, self.language)) diff --git a/mex/common/types/vocabulary.py b/mex/common/types/vocabulary.py index f17b06f5..1426dcad 100644 --- a/mex/common/types/vocabulary.py +++ b/mex/common/types/vocabulary.py @@ -5,7 +5,14 @@ from importlib.resources import files from typing import TYPE_CHECKING, ClassVar, Self, Union -from pydantic import AnyUrl, BaseModel +from pydantic import ( + AnyUrl, + BaseModel, + GetCoreSchemaHandler, + GetJsonSchemaHandler, + json_schema, +) +from pydantic_core import core_schema from mex.common.utils import normalize @@ -15,6 +22,7 @@ from mex.common.types import Text MODEL_VOCABULARIES = files("mex.model.vocabularies") +VOCABULARY_PATTERN = r"https://mex.rki.de/item/[a-z0-9-]+" class BilingualText(BaseModel): @@ -71,10 +79,6 @@ class VocabularyEnum(Enum, metaclass=VocabularyLoader): __vocabulary__: ClassVar[str] __concepts__: ClassVar[list[Concept]] - def __repr__(self) -> str: - """Overwrite representation because dynamic enum names are unknown to mypy.""" - return f'{self.__class__.__name__}["{self.name}"]' - @classmethod def find(cls, search_term: Union[str, "Text"]) -> Self | None: """Get the enum instance that matches a label of the underlying concepts. @@ -99,12 +103,50 @@ def find(cls, search_term: Union[str, "Text"]) -> Self | None: continue if language is None: searchable_labels.extend([normalize(label.de), normalize(label.en)]) - elif language_label := label.dict().get(language.value): + elif language_label := label.model_dump().get(language.value): searchable_labels.append(normalize(language_label)) if search_term in searchable_labels: return cls(str(concept.identifier)) return None + @classmethod + def __get_pydantic_core_schema__( + cls, source_type: object, handler: GetCoreSchemaHandler + ) -> core_schema.CoreSchema: + """Modify the core schema to add the vocabulary regex.""" + return core_schema.json_or_python_schema( + json_schema=core_schema.union_schema( + [ + core_schema.str_schema(pattern=VOCABULARY_PATTERN), + core_schema.no_info_plain_validator_function(cls), + ], + ), + python_schema=core_schema.chain_schema( + [ + core_schema.is_instance_schema(cls | str), + core_schema.no_info_plain_validator_function(cls), + ] + ), + serialization=core_schema.plain_serializer_function_ser_schema( + lambda s: s.value, + when_used="unless-none", + ), + ) + + @classmethod + def __get_pydantic_json_schema__( + cls, core_schema_: core_schema.CoreSchema, handler: GetJsonSchemaHandler + ) -> json_schema.JsonSchemaValue: + """Modify the json schema to add the scheme and an example.""" + json_schema_ = handler(core_schema_) + json_schema_["examples"] = [f"https://mex.rki.de/item/{cls.__vocabulary__}-1"] + json_schema_["useScheme"] = f"https://mex.rki.de/item/{cls.__vocabulary__}" + return json_schema_ + + def __repr__(self) -> str: + """Overwrite representation because dynamic enum names are unknown to mypy.""" + return f'{self.__class__.__name__}["{self.name}"]' + class AccessRestriction(VocabularyEnum): """The access restriction type.""" diff --git a/tests/backend_api/test_connector.py b/tests/backend_api/test_connector.py index a2de406d..083c0f38 100644 --- a/tests/backend_api/test_connector.py +++ b/tests/backend_api/test_connector.py @@ -133,7 +133,7 @@ def test_get_merged_item_mocked( "GET", "http://localhost:8080/v0/merged-item", { - "stableTargetId": "NGwfzG8ROsrvIiQIVDVy", + "identifier": "NGwfzG8ROsrvIiQIVDVy", "limit": "1", }, headers={ @@ -156,7 +156,7 @@ def test_get_merged_item_error_mocked(mocked_backend: MagicMock) -> None: "GET", "http://localhost:8080/v0/merged-item", { - "stableTargetId": "NGwfzG8ROsrvIiQIVDVy", + "identifier": "NGwfzG8ROsrvIiQIVDVy", "limit": "1", }, headers={ diff --git a/tests/models/test_model_schemas.py b/tests/models/test_model_schemas.py index 5e31b283..c828485c 100644 --- a/tests/models/test_model_schemas.py +++ b/tests/models/test_model_schemas.py @@ -10,7 +10,7 @@ from mex.common.models import EXTRACTED_MODEL_CLASSES, BaseModel from mex.common.transform import dromedary_to_kebab -from mex.common.types.identifier import MEX_ID_PATTERN +from mex.common.types import IDENTIFIER_PATTERN, VOCABULARY_PATTERN MEX_MODEL_ENTITIES = files("mex.model.entities") @@ -137,13 +137,11 @@ def prepare_field(field: str, obj: list[Any] | dict[str, Any]) -> None: # pop annotations that we don't compare directly but use for other comparisons title = obj.pop("title", "") # only in model (autogenerated by pydantic) - use_scheme = obj.pop("useScheme", "") # only in spec (needed to select vocabulary) - vocabulary = use_scheme.removeprefix("https://mex.rki.de/item/") # vocabulary name # align reference paths # (the paths to referenced vocabularies and types differ between the models # and the specification, so we need to make sure they match before comparing) - if obj.get("pattern") == MEX_ID_PATTERN: + if obj.get("pattern") == IDENTIFIER_PATTERN: obj.pop("pattern") obj.pop("type") if field in ("identifier", "stableTargetId"): @@ -156,9 +154,10 @@ def prepare_field(field: str, obj: list[Any] | dict[str, Any]) -> None: ) # align concept/enum annotations - # (spec uses `useScheme` to specify vocabularies and models use enums) if obj.get("$ref") == "/schema/entities/concept#/identifier": - obj["$ref"] = f"/schema/fields/{vocabulary}" + obj["pattern"] = VOCABULARY_PATTERN + obj["type"] = "string" + obj.pop("$ref") # make sure all refs have paths in kebab-case # (the models use the class names, whereas the spec uses kebab-case URLs) diff --git a/tests/test_settings.py b/tests/test_settings.py index 64680fdc..2a4d375e 100644 --- a/tests/test_settings.py +++ b/tests/test_settings.py @@ -88,13 +88,8 @@ class DummySettings(BaseSettings): sub_model=SubModel(sub_model_path=relative), ) - settings_dict = settings.model_dump() - assert settings_dict["non_path"] == "blablabla" - assert settings_dict["abs_work_path"] == absolute - assert settings_dict["rel_work_path"] == WorkPath(settings.work_dir / relative) - assert settings_dict["assets_path"] == AssetsPath( - absolute / "assets_dir" / relative - ) - assert settings_dict["sub_model"]["sub_model_path"] == WorkPath( - settings.work_dir / relative - ) + assert settings.non_path == "blablabla" + assert settings.abs_work_path == absolute + assert settings.rel_work_path == WorkPath(settings.work_dir / relative) + assert settings.assets_path == AssetsPath(absolute / "assets_dir" / relative) + assert settings.sub_model.sub_model_path == WorkPath(settings.work_dir / relative) diff --git a/tests/types/test_data/dummy-vocabulary.json b/tests/types/test_data/dummy-vocabulary.json index 4852a481..fd503d2d 100644 --- a/tests/types/test_data/dummy-vocabulary.json +++ b/tests/types/test_data/dummy-vocabulary.json @@ -8,8 +8,8 @@ "de": "desc-de-one", "en": "desc-en-one" }, - "identifier": "https://dummy/concept-one", - "inScheme": "https://dummy/concept", + "identifier": "https://mex.rki.de/item/dummy-concept-1", + "inScheme": "https://mex.rki.de/item/dummy-concept", "prefLabel": { "de": "pref-de-one", "en": "pref-en-one" @@ -17,8 +17,8 @@ }, { "definition": null, - "identifier": "https://dummy/concept-two", - "inScheme": "https://dummy/concept", + "identifier": "https://mex.rki.de/item/dummy-concept-2", + "inScheme": "https://mex.rki.de/item/dummy-concept", "prefLabel": { "de": "pref-de-two", "en": "pref-en-two" diff --git a/tests/types/test_email.py b/tests/types/test_email.py index a18eb7a4..56485bbd 100644 --- a/tests/types/test_email.py +++ b/tests/types/test_email.py @@ -1,16 +1,53 @@ import pytest from pydantic import BaseModel, ValidationError -from mex.common.types import Email +from mex.common.types import EMAIL_PATTERN, Email class DummyModel(BaseModel): email: Email -def test_email() -> None: +def test_email_validation() -> None: model = DummyModel.model_validate({"email": "wasd@def.ghi"}) - assert model.email == "wasd@def.ghi" + assert model.email == Email("wasd@def.ghi") + + model = DummyModel.model_validate({"email": Email("wasd@def.ghi")}) + assert model.email == Email("wasd@def.ghi") + + model = DummyModel(email=Email("wasd@def.ghi")) + assert model.email == Email("wasd@def.ghi") with pytest.raises(ValidationError): DummyModel.model_validate({"email": "foobar"}) + + with pytest.raises(ValidationError): + DummyModel.model_validate({"email": object()}) + + +def test_email_serialization() -> None: + model = DummyModel.model_validate({"email": "wasd@def.ghi"}) + raw = model.model_dump() + + assert raw == {"email": "wasd@def.ghi"} + + +def test_email_schema() -> None: + assert DummyModel.model_json_schema() == { + "properties": { + "email": { + "examples": ["info@rki.de"], + "format": "email", + "pattern": EMAIL_PATTERN, + "title": "Email", + "type": "string", + } + }, + "required": ["email"], + "title": "DummyModel", + "type": "object", + } + + +def test_email_repr() -> None: + assert repr(Email("wasd@def.ghi")) == 'Email("wasd@def.ghi")' diff --git a/tests/types/test_identifier.py b/tests/types/test_identifier.py index 1f350b4e..7b6dcbe0 100644 --- a/tests/types/test_identifier.py +++ b/tests/types/test_identifier.py @@ -4,59 +4,58 @@ from pydantic import BaseModel, ValidationError from pytest import MonkeyPatch -from mex.common.types import Identifier +from mex.common.types import IDENTIFIER_PATTERN, Identifier -class DummyID(Identifier): +class DummyIdentifier(Identifier): pass class DummyModel(BaseModel): - id: Identifier - dummy: DummyID | None = None + id: DummyIdentifier -def test_identifier_validates() -> None: - model_with_obj = DummyModel.model_validate({"id": Identifier("bFQoRhcVH5DIfZ")}) - model_with_raw = DummyModel.model_validate({"id": "bFQoRhcVH5DIfZ"}) - model_with_raw_uuid = DummyModel.model_validate( - {"id": "00000000-0000-4000-8000-000000000539"} - ) - model_with_uuid_obj = DummyModel.model_validate({"id": UUID(int=1337, version=4)}) - - assert ( - model_with_obj.id - == model_with_raw.id - == model_with_raw_uuid.id - == model_with_uuid_obj.id - == Identifier.generate(seed=1337) - ) +def test_identifier_validation() -> None: + model = DummyModel.model_validate({"id": "bFQoRhcVH5DIfZ"}) + assert model.id == DummyIdentifier("bFQoRhcVH5DIfZ") + + model = DummyModel.model_validate({"id": DummyIdentifier("bFQoRhcVH5DIfZ")}) + assert model.id == DummyIdentifier("bFQoRhcVH5DIfZ") + + model = DummyModel(id=DummyIdentifier("bFQoRhcVH5DIfZ")) + assert model.id == DummyIdentifier("bFQoRhcVH5DIfZ") with pytest.raises(ValidationError): DummyModel.model_validate({"id": "baaiaaaboi!!!"}) with pytest.raises(ValidationError): - DummyModel.model_validate({"id": 42}) + DummyModel.model_validate({"id": object()}) -def test_identifier_modifies_schema() -> None: - assert DummyModel.model_json_schema()["properties"]["id"] == { - "title": "Identifier", - "type": "string", - "pattern": r"^[a-zA-Z0-9]{14,22}$", - } - assert DummyModel.model_json_schema()["properties"]["dummy"] == { - "anyOf": [ - {"pattern": "^[a-zA-Z0-9]{14,22}$", "title": "DummyID", "type": "string"}, - {"type": "null"}, - ], - "default": None, - "title": "Dummy", +def test_identifier_serialization() -> None: + model = DummyModel(id=DummyIdentifier("bFQoRhcVH5DIfZ")) + raw = model.model_dump() + + assert raw == {"id": "bFQoRhcVH5DIfZ"} + + +def test_identifier_schema() -> None: + assert DummyModel.model_json_schema() == { + "properties": { + "id": { + "pattern": IDENTIFIER_PATTERN, + "title": "DummyIdentifier", + "type": "string", + } + }, + "required": ["id"], + "title": "DummyModel", + "type": "object", } def test_identifier_repr() -> None: - assert repr(Identifier("baaiaaaaaaaboi")) == "Identifier('baaiaaaaaaaboi')" + assert repr(Identifier("baaiaaaaaaaboi")) == 'Identifier("baaiaaaaaaaboi")' def test_identifier_generate(monkeypatch: MonkeyPatch) -> None: diff --git a/tests/types/test_link.py b/tests/types/test_link.py index 8e96d5c5..000c2866 100644 --- a/tests/types/test_link.py +++ b/tests/types/test_link.py @@ -1,56 +1,38 @@ -from pydantic import BaseModel +import pytest +from pydantic import BaseModel, ValidationError from mex.common.types import Link, LinkLanguage -def test_parsing_from_string() -> None: - class DummyModel(BaseModel): - link: Link +class DummyModel(BaseModel): + link: Link - # plain link - model = DummyModel.model_validate({"link": "https://example.com"}) - assert model.model_dump(exclude_none=True) == { - "link": {"url": "https://example.com"} - } - # link with title - model = DummyModel.model_validate({"link": "[Example](https://example.com)"}) - assert model.model_dump(exclude_none=True) == { - "link": {"url": "https://example.com", "title": "Example"} - } +def test_link_validation() -> None: + with pytest.raises(ValidationError, match="Allowed input types are dict and str"): + _ = DummyModel.model_validate({"link": 1}) - # link with funky characters - model = DummyModel.model_validate( - {"link": r"[\[TEST\] Example](https://example.com/test?q=\(\.\*\))"} - ) - assert model.model_dump(exclude_none=True) == { - "link": {"url": "https://example.com/test?q=(.*)", "title": "[TEST] Example"} + model = DummyModel.model_validate({"link": "https://example.com"}) + assert model.model_dump() == { + "link": { + "language": None, + "title": None, + "url": "https://example.com", + } } - # nested model model = DummyModel.model_validate( {"link": {"url": "https://example.com", "title": "Example", "language": "en"}} ) - assert model.model_dump(exclude_none=True) == { + assert model.model_dump() == { "link": { - "url": "https://example.com", - "title": "Example", "language": LinkLanguage.EN, + "title": "Example", + "url": "https://example.com", } } -def test_rendering_as_string() -> None: - # plain link - link = Link.model_validate({"url": "https://example.com"}) - assert str(link) == "https://example.com" - - # link with title - link = Link.model_validate({"url": "https://example.com", "title": "Example"}) - assert str(link) == r"[Example](https://example\.com)" - - # link with funky characters - link = Link.model_validate( - {"url": "https://example.com/test?q=(.*)", "title": "[TEST] Example"} - ) - assert str(link) == r"[\[TEST\] Example](https://example\.com/test?q=\(\.\*\))" +def test_link_hash() -> None: + link = Link(url="https://foo.bar", title="Hallo Welt.", language=LinkLanguage.DE) + assert hash(link) == hash(("https://foo.bar", "Hallo Welt.", LinkLanguage.DE)) diff --git a/tests/types/test_temporal_entity.py b/tests/types/test_temporal_entity.py index 3f3f18be..53d7dd42 100644 --- a/tests/types/test_temporal_entity.py +++ b/tests/types/test_temporal_entity.py @@ -103,15 +103,6 @@ def test_temporal_entity_value_errors( cls(*args, **kwargs) -@pytest.mark.parametrize( - ("value", "message"), - [(object(), "Cannot parse <class 'object'> as TemporalEntity")], -) -def test_temporal_entity_validation_errors(value: Any, message: str) -> None: - with pytest.raises(TypeError, match=message): - TemporalEntity.validate(value) - - @pytest.mark.parametrize( ("cls", "args", "kwargs", "expected"), [ @@ -271,10 +262,31 @@ def test_temporal_entity_repr() -> None: ) -def test_temporal_entity_serialization() -> None: - class Person(BaseModel): - birthday: YearMonthDay +class DummyModel(BaseModel): + birthday: YearMonthDay - person = Person.model_validate({"birthday": "24th July 1999"}) + +def test_temporal_entity_schema() -> None: + assert DummyModel.model_json_schema() == { + "properties": { + "birthday": { + "examples": ["2014-08-24"], + "format": "date", + "pattern": "^\\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])$", + "title": "YearMonthDay", + "type": "string", + } + }, + "required": ["birthday"], + "title": "DummyModel", + "type": "object", + } + + +DummyModel.model_json_schema() + + +def test_temporal_entity_serialization() -> None: + person = DummyModel.model_validate({"birthday": "24th July 1999"}) assert person.model_dump_json() == '{"birthday":"1999-07-24"}' diff --git a/tests/types/test_text.py b/tests/types/test_text.py index 494814f1..f9f686d6 100644 --- a/tests/types/test_text.py +++ b/tests/types/test_text.py @@ -33,23 +33,27 @@ def test_text_language_detect() -> None: assert none_text.language is None -def test_parsing_from_string() -> None: - class DummyModel(BaseModel): - text: Text +class DummyModel(BaseModel): + text: Text + + +def test_text_validation() -> None: + with pytest.raises(ValidationError, match="Allowed input types are dict and str"): + _ = DummyModel.model_validate({"text": 1}) model = DummyModel.model_validate({"text": "we are parsing a string here"}) assert model.model_dump() == { "text": {"value": "we are parsing a string here", "language": TextLanguage.EN} } - with pytest.raises(ValidationError): - _ = DummyModel.model_validate({"text": 1}) - model = DummyModel.model_validate( - {"text": {"value": "and here, we parsing an object"}} + {"text": {"value": "and here, we are parsing an object"}} ) assert model.model_dump() == { - "text": {"value": "and here, we parsing an object", "language": TextLanguage.EN} + "text": { + "value": "and here, we are parsing an object", + "language": TextLanguage.EN, + } } model = DummyModel.model_validate( @@ -68,11 +72,6 @@ class DummyModel(BaseModel): } -def test_text_str() -> None: - text = Text(value="Hello world.") - assert str(text) == "Hello world." - - def test_text_hash() -> None: text = Text(value="Hallo Welt.", language=TextLanguage.DE) assert hash(text) == hash(("Hallo Welt.", TextLanguage.DE)) diff --git a/tests/types/test_vocabulary.py b/tests/types/test_vocabulary.py index 52805e0d..1e2d0d40 100644 --- a/tests/types/test_vocabulary.py +++ b/tests/types/test_vocabulary.py @@ -35,8 +35,8 @@ class DummyEnum(VocabularyEnum): # check enum values are loaded correctly assert [c.value for c in DummyEnum] == [ - "https://dummy/concept-one", - "https://dummy/concept-two", + "https://mex.rki.de/item/dummy-concept-1", + "https://mex.rki.de/item/dummy-concept-2", ] # check enum instance representation @@ -51,13 +51,41 @@ class DummyModel(BaseModel): # check wrong value raises error with pytest.raises(ValidationError): - DummyModel.model_validate({"dummy": "https://dummy/not-a-valid-concept"}) + DummyModel.model_validate( + {"dummy": "https://mex.rki.de/item/not-a-valid-concept"} + ) # check parsing from string works - model = DummyModel.model_validate({"dummy": "https://dummy/concept-two"}) + model = DummyModel.model_validate( + {"dummy": "https://mex.rki.de/item/dummy-concept-2"} + ) assert model.dummy == DummyEnum["PREF_EN_TWO"] +@pytest.mark.usefixtures("use_dummy_vocabulary") +def test_vocabulary_enum_schema() -> None: + class DummyEnum(VocabularyEnum): + __vocabulary__ = "dummy-vocabulary" + + class DummyModel(BaseModel): + dummy: DummyEnum + + assert DummyModel.model_json_schema() == { + "properties": { + "dummy": { + "examples": ["https://mex.rki.de/item/dummy-vocabulary-1"], + "pattern": "https://mex.rki.de/item/[a-z0-9-]+", + "title": "Dummy", + "type": "string", + "useScheme": "https://mex.rki.de/item/dummy-vocabulary", + } + }, + "required": ["dummy"], + "title": "DummyModel", + "type": "object", + } + + @pytest.mark.usefixtures("use_dummy_vocabulary") def test_vocabulary_enum_find() -> None: class DummyEnum(VocabularyEnum): @@ -68,4 +96,4 @@ class DummyEnum(VocabularyEnum): found_enum = DummyEnum.find("pref-de-one") assert found_enum is not None - assert found_enum.value == "https://dummy/concept-one" + assert found_enum.value == "https://mex.rki.de/item/dummy-concept-1"