Skip to content

Commit

Permalink
feature/mx-1702 update types and schemas (#291)
Browse files Browse the repository at this point in the history
# PR Context
- prep for robert-koch-institut/mex-editor#158

# Added
- add pattern constants for vocabs, emails, urls and ids to types module
- add regex pattern to json schema of identifier fields
- automatically add examples and useScheme to json schema of enum fields

# Changes
- BREAKING: use `identifier` instead of `stableTargetId` to get merged
item from backend
- ensure identifier unions are typed to generic `Identifier` instead of
the first match
- to signal that we don't actually know which of the union types is
correct
- unify pydantic schema configuration for all types
- consistently parse emails, identifiers and temporals in models to
their type, not str
- consistently serialize emails, ids and temporals in models to str, not
their type
- make instances of Link type hashable, to harmonize them with Text
models

# Removed
- drop manual examples from enum fields, because they are autogenerated
now
- BREAKING: remove `MEX_ID_PATTERN` from types, in favor of
`IDENTIFIER_PATTERN`
- BREAKING: make public `MEX_ID_ALPHABET` constant from identifier
module private
- BREAKING: remove `__str__` methods from Text and Link classes
- BREAKING: drop support for parsing UUIDs as Identifiers, this was
unused
- BREAKING: drop support for parsing Links from markdown syntax, this
was unused
- BREAKING: remove pydantic1-style `validate` methods from all type
models
- BREAKING: `BackendApiConnector.post_models` in favor of
`post_extracted_items`

---------

Signed-off-by: Nicolas Drebenstedt <[email protected]>
Co-authored-by: rababerladuseladim <[email protected]>
  • Loading branch information
cutoffthetop and rababerladuseladim authored Oct 11, 2024
1 parent a0f1389 commit 23982bd
Show file tree
Hide file tree
Showing 27 changed files with 415 additions and 470 deletions.
23 changes: 22 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,42 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

- add pattern constants for vocabs, emails, urls and ids to types module
- add regex pattern to json schema of identifier fields
- automatically add examples and useScheme to json schema of enum fields

### Changes

- BREAKING: use `identifier` instead of `stableTargetId` to get merged item from backend
- ensure identifier unions are typed to generic `Identifier` instead of the first match
to signal that we don't actually know which of the union types is correct
- unify pydantic schema configuration for all types
- consistently parse emails, identifiers and temporals in models to their type, not str
- consistently serialize emails, ids and temporals in models to str, not their type
- make instances of Link type hashable, to harmonize them with Text models

### Deprecated

### Removed

- drop manual examples from enum fields, because they are autogenerated now
- BREAKING: remove `MEX_ID_PATTERN` from types, in favor of `IDENTIFIER_PATTERN`
- BREAKING: make public `MEX_ID_ALPHABET` constant from identifier module private
- BREAKING: remove `__str__` methods from Text and Link classes
- BREAKING: drop support for parsing UUIDs as Identifiers, this was unused
- BREAKING: drop support for parsing Links from markdown syntax, this was unused
- BREAKING: remove pydantic1-style `validate` methods from all type models
- BREAKING: `BackendApiConnector.post_models` in favor of `post_extracted_items`

### Fixed

### Security

## [0.37.0] - 2024-10-01

### Added
- added methods for extracting persons by name or ID from ldap

- added methods for extracting persons by name or ID from ldap
- `contains_only_types` to check if fields are annotated as desired
- `group_fields_by_class_name` utility to simplify filtered model/field lookups
- new parameters to `get_inner_types` to customize what to unpack
Expand Down
34 changes: 4 additions & 30 deletions mex/common/backend_api/connector.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from typing import cast
from urllib.parse import urljoin

from requests.exceptions import HTTPError
Expand All @@ -19,7 +18,6 @@
AnyRuleSetResponse,
)
from mex.common.settings import BaseSettings
from mex.common.types import AnyExtractedIdentifier


class BackendApiConnector(HTTPConnector):
Expand All @@ -41,27 +39,6 @@ def _set_url(self) -> None:
settings = BaseSettings.get()
self.url = urljoin(str(settings.backend_api_url), self.API_VERSION)

def post_models(
self,
extracted_items: list[AnyExtractedModel],
) -> list[AnyExtractedIdentifier]:
"""Post extracted models to the backend in bulk.
Args:
extracted_items: Extracted models to post
Raises:
HTTPError: If post was not accepted, crashes or times out
Returns:
Identifiers of posted extracted models
"""
# XXX deprecated method, please use `post_extracted_models` instead
return cast(
list[AnyExtractedIdentifier],
self.post_extracted_items(extracted_items).identifiers,
)

def post_extracted_items(
self,
extracted_items: list[AnyExtractedModel],
Expand Down Expand Up @@ -141,7 +118,6 @@ def fetch_merged_items(
Returns:
One page of merged items and the total count that was matched
"""
# XXX this endpoint will only return faux merged items for now (MX-1382)
response = self.request(
method="GET",
endpoint="merged-item",
Expand All @@ -156,12 +132,12 @@ def fetch_merged_items(

def get_merged_item(
self,
stable_target_id: str,
identifier: str,
) -> AnyMergedModel:
"""Return one merged item for the given `stableTargetId`.
"""Return one merged item for the given `identifier`.
Args:
stable_target_id: The merged item's identifier
identifier: The merged item's identifier
Raises:
MExError: If no merged item was found
Expand All @@ -174,7 +150,7 @@ def get_merged_item(
method="GET",
endpoint="merged-item",
params={
"stableTargetId": stable_target_id,
"identifier": identifier,
"limit": "1",
},
)
Expand All @@ -201,7 +177,6 @@ def preview_merged_item(
Returns:
A single merged item
"""
# XXX experimental method until the backend has a preview endpoint (MX-1406)
response = self.request(
method="GET",
endpoint=f"preview-item/{stable_target_id}",
Expand All @@ -224,7 +199,6 @@ def get_rule_set(
Returns:
A set of three rules
"""
# XXX experimental method until the backend has a rule-set endpoint (MX-1416)
response = self.request(
method="GET",
endpoint=f"rule-set/{stable_target_id}",
Expand Down
38 changes: 13 additions & 25 deletions mex/common/models/access_platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from typing import Annotated, ClassVar, Literal

from pydantic import Field, computed_field
from pydantic import AfterValidator, Field, computed_field

from mex.common.models.base.extracted_data import ExtractedData
from mex.common.models.base.merged_item import MergedItem
Expand All @@ -16,6 +16,7 @@
from mex.common.types import (
APIType,
ExtractedAccessPlatformIdentifier,
Identifier,
Link,
MergedAccessPlatformIdentifier,
MergedContactPointIdentifier,
Expand All @@ -36,9 +37,12 @@ class _Stem(BaseModel):
class _OptionalLists(_Stem):
alternativeTitle: list[Text] = []
contact: list[
MergedOrganizationalUnitIdentifier
| MergedPersonIdentifier
| MergedContactPointIdentifier
Annotated[
MergedOrganizationalUnitIdentifier
| MergedPersonIdentifier
| MergedContactPointIdentifier,
AfterValidator(Identifier),
]
] = []
description: list[Text] = []
landingPage: list[Link] = []
Expand All @@ -48,39 +52,23 @@ class _OptionalLists(_Stem):

class _OptionalValues(_Stem):
endpointDescription: Link | None = None
endpointType: (
Annotated[APIType, Field(examples=["https://mex.rki.de/item/api-type-1"])]
| None
) = None
endpointType: APIType | None = None
endpointURL: Link | None = None


class _RequiredValues(_Stem):
technicalAccessibility: Annotated[
TechnicalAccessibility,
Field(examples=["https://mex.rki.de/item/technical-accessibility-1"]),
]
technicalAccessibility: TechnicalAccessibility


class _SparseValues(_Stem):
technicalAccessibility: Annotated[
TechnicalAccessibility | None,
Field(examples=["https://mex.rki.de/item/technical-accessibility-1"]),
] = None
technicalAccessibility: TechnicalAccessibility | None = None


class _VariadicValues(_Stem):
endpointDescription: list[Link]
endpointType: list[
Annotated[APIType, Field(examples=["https://mex.rki.de/item/api-type-1"])]
] = []
endpointType: list[APIType] = []
endpointURL: list[Link] = []
technicalAccessibility: list[
Annotated[
TechnicalAccessibility,
Field(examples=["https://mex.rki.de/item/technical-accessibility-1"]),
]
] = []
technicalAccessibility: list[TechnicalAccessibility] = []


class BaseAccessPlatform(_OptionalLists, _OptionalValues, _RequiredValues):
Expand Down
38 changes: 22 additions & 16 deletions mex/common/models/activity.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from typing import Annotated, ClassVar, Literal

from pydantic import Field, computed_field
from pydantic import AfterValidator, Field, computed_field

from mex.common.models.base.extracted_data import ExtractedData
from mex.common.models.base.merged_item import MergedItem
Expand All @@ -19,6 +19,7 @@
from mex.common.types import (
ActivityType,
ExtractedActivityIdentifier,
Identifier,
Link,
MergedActivityIdentifier,
MergedContactPointIdentifier,
Expand All @@ -39,15 +40,16 @@ class _Stem(BaseModel):

class _OptionalLists(_Stem):
abstract: list[Text] = []
activityType: list[
Annotated[
ActivityType, Field(examples=["https://mex.rki.de/item/activity-type-1"])
]
] = []
activityType: list[ActivityType] = []
alternativeTitle: list[Text] = []
documentation: list[Link] = []
end: list[YearMonthDay | YearMonth] = []
externalAssociate: list[MergedOrganizationIdentifier | MergedPersonIdentifier] = []
externalAssociate: list[
Annotated[
MergedOrganizationIdentifier | MergedPersonIdentifier,
AfterValidator(Identifier),
]
] = []
funderOrCommissioner: list[MergedOrganizationIdentifier] = []
fundingProgram: list[str] = []
involvedPerson: list[MergedPersonIdentifier] = []
Expand All @@ -57,18 +59,19 @@ class _OptionalLists(_Stem):
shortName: list[Text] = []
start: list[YearMonthDay | YearMonth] = []
succeeds: list[MergedActivityIdentifier] = []
theme: list[
Annotated[Theme, Field(examples=["https://mex.rki.de/item/theme-1"])]
] = []
theme: list[Theme] = []
website: list[Link] = []


class _RequiredLists(_Stem):
contact: Annotated[
list[
MergedOrganizationalUnitIdentifier
| MergedPersonIdentifier
| MergedContactPointIdentifier,
Annotated[
MergedOrganizationalUnitIdentifier
| MergedPersonIdentifier
| MergedContactPointIdentifier,
AfterValidator(Identifier),
]
],
Field(min_length=1),
]
Expand All @@ -80,9 +83,12 @@ class _RequiredLists(_Stem):

class _SparseLists(_Stem):
contact: list[
MergedOrganizationalUnitIdentifier
| MergedPersonIdentifier
| MergedContactPointIdentifier,
Annotated[
MergedOrganizationalUnitIdentifier
| MergedPersonIdentifier
| MergedContactPointIdentifier,
AfterValidator(Identifier),
]
] = []
responsibleUnit: list[MergedOrganizationalUnitIdentifier] = []
title: list[Text] = []
Expand Down
3 changes: 3 additions & 0 deletions mex/common/models/consent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# XXX this is a forward-compatibility hint for feature/model-update-v3:
# when this gets merged with model v3, remove the
# `Annotated[..., Field(examples=["https://mex..."])]` from all enum fields
34 changes: 5 additions & 29 deletions mex/common/models/distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,26 +59,13 @@ class _OptionalValues(_Stem):
accessService: MergedAccessPlatformIdentifier | None = None
accessURL: Link | None = None
downloadURL: Link | None = None
license: (
Annotated[License, Field(examples=["https://mex.rki.de/item/license-1"])] | None
) = None
mediaType: (
Annotated[
MIMEType,
Field(
examples=["https://mex.rki.de/item/mime-type-1"],
),
]
| None
) = None
license: License | None = None
mediaType: MIMEType | None = None
modified: YearMonthDayTime | YearMonthDay | YearMonth | None = None


class _RequiredValues(_Stem):
accessRestriction: Annotated[
AccessRestriction,
Field(examples=["https://mex.rki.de/item/access-restriction-1"]),
]
accessRestriction: AccessRestriction
issued: YearMonthDayTime | YearMonthDay | YearMonth
title: Annotated[
str,
Expand All @@ -90,13 +77,7 @@ class _RequiredValues(_Stem):


class _SparseValues(_Stem):
accessRestriction: (
Annotated[
AccessRestriction,
Field(examples=["https://mex.rki.de/item/access-restriction-1"]),
]
| None
) = None
accessRestriction: AccessRestriction | None = None
issued: YearMonthDayTime | YearMonthDay | YearMonth | None = None
title: (
Annotated[
Expand All @@ -111,12 +92,7 @@ class _SparseValues(_Stem):


class _VariadicValues(_Stem):
accessRestriction: list[
Annotated[
AccessRestriction,
Field(examples=["https://mex.rki.de/item/access-restriction-1"]),
]
] = []
accessRestriction: list[AccessRestriction] = []
issued: list[YearMonthDayTime | YearMonthDay | YearMonth] = []
title: list[
Annotated[
Expand Down
12 changes: 8 additions & 4 deletions mex/common/models/primary_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from typing import Annotated, ClassVar, Literal

from pydantic import Field, computed_field
from pydantic import AfterValidator, Field, computed_field

from mex.common.models.base.extracted_data import ExtractedData
from mex.common.models.base.merged_item import MergedItem
Expand All @@ -15,6 +15,7 @@
)
from mex.common.types import (
ExtractedPrimarySourceIdentifier,
Identifier,
Link,
MergedContactPointIdentifier,
MergedOrganizationalUnitIdentifier,
Expand All @@ -33,9 +34,12 @@ class _Stem(BaseModel):
class _OptionalLists(_Stem):
alternativeTitle: list[Text] = []
contact: list[
MergedOrganizationalUnitIdentifier
| MergedPersonIdentifier
| MergedContactPointIdentifier
Annotated[
MergedOrganizationalUnitIdentifier
| MergedPersonIdentifier
| MergedContactPointIdentifier,
AfterValidator(Identifier),
]
] = []
description: list[Text] = []
documentation: list[Link] = []
Expand Down
Loading

0 comments on commit 23982bd

Please sign in to comment.