From e30ac0a930f05961ac8644b0005cfaee1be09447 Mon Sep 17 00:00:00 2001 From: 07pepa <“no@sharebcs.spam”> Date: Sat, 24 Feb 2024 11:43:24 +0100 Subject: [PATCH] Add language code ISO 639-3 and ISO 639-5 and definitions and tests * added dynamically generated literals based on pycountry * tested all possibilities and errors exhaustively --- pydantic_extra_types/language_code.py | 77 +++++++++++++++++++++++++++ tests/test_json_schema.py | 41 ++++++++++++++ tests/test_language_codes.py | 53 ++++++++++++++++++ 3 files changed, 171 insertions(+) create mode 100644 pydantic_extra_types/language_code.py create mode 100644 tests/test_language_codes.py diff --git a/pydantic_extra_types/language_code.py b/pydantic_extra_types/language_code.py new file mode 100644 index 00000000..83a90a7c --- /dev/null +++ b/pydantic_extra_types/language_code.py @@ -0,0 +1,77 @@ +from __future__ import annotations + +from typing import Any + +from pydantic import GetCoreSchemaHandler, GetJsonSchemaHandler +from pydantic_core import PydanticCustomError, core_schema + +try: + import pycountry +except ModuleNotFoundError: # pragma: no cover + raise RuntimeError( + 'The `language_code` module requires "pycountry" to be installed.' + ' You can install it with "pip install pycountry".' + ) + + +class ISO639_3(str): + # noinspection PyUnresolvedReferences + allowed_values_list = [lang.alpha_3 for lang in pycountry.languages] + allowed_values = set(allowed_values_list) + + @classmethod + def _validate(cls, __input_value: str, _: core_schema.ValidationInfo) -> ISO639_3: + if __input_value not in cls.allowed_values: + raise PydanticCustomError( + 'ISO649_3', 'Invalid ISO 639-3 language code. See https://en.wikipedia.org/wiki/ISO_639-3' + ) + return cls(__input_value) + + @classmethod + def __get_pydantic_core_schema__( + cls, _: type[Any], __: GetCoreSchemaHandler + ) -> core_schema.AfterValidatorFunctionSchema: + return core_schema.with_info_after_validator_function( + cls._validate, + core_schema.str_schema(min_length=3, max_length=3), + ) + + @classmethod + def __get_pydantic_json_schema__( + cls, schema: core_schema.CoreSchema, handler: GetJsonSchemaHandler + ) -> dict[str, Any]: + json_schema = handler(schema) + json_schema.update({'enum': cls.allowed_values_list}) + return json_schema + + +class ISO639_5(str): + # noinspection PyUnresolvedReferences + allowed_values_list = [lang.alpha_3 for lang in pycountry.language_families] + allowed_values_list.sort() + allowed_values = set(allowed_values_list) + + @classmethod + def _validate(cls, __input_value: str, _: core_schema.ValidationInfo) -> ISO639_5: + if __input_value not in cls.allowed_values: + raise PydanticCustomError( + 'ISO649_5', 'Invalid ISO 639-5 language code. See https://en.wikipedia.org/wiki/ISO_639-5' + ) + return cls(__input_value) + + @classmethod + def __get_pydantic_core_schema__( + cls, _: type[Any], __: GetCoreSchemaHandler + ) -> core_schema.AfterValidatorFunctionSchema: + return core_schema.with_info_after_validator_function( + cls._validate, + core_schema.str_schema(min_length=3, max_length=3), + ) + + @classmethod + def __get_pydantic_json_schema__( + cls, schema: core_schema.CoreSchema, handler: GetJsonSchemaHandler + ) -> dict[str, Any]: + json_schema = handler(schema) + json_schema.update({'enum': cls.allowed_values_list}) + return json_schema diff --git a/tests/test_json_schema.py b/tests/test_json_schema.py index 9bc5cf0d..4d428c03 100644 --- a/tests/test_json_schema.py +++ b/tests/test_json_schema.py @@ -1,3 +1,4 @@ +import pycountry import pytest from pydantic import BaseModel @@ -10,11 +11,17 @@ CountryShortName, ) from pydantic_extra_types.isbn import ISBN +from pydantic_extra_types.language_code import ISO639_3, ISO639_5 from pydantic_extra_types.mac_address import MacAddress from pydantic_extra_types.payment import PaymentCardNumber from pydantic_extra_types.pendulum_dt import DateTime from pydantic_extra_types.ulid import ULID +languages = [lang.alpha_3 for lang in pycountry.languages] +language_families = [lang.alpha_3 for lang in pycountry.language_families] +languages.sort() +language_families.sort() + @pytest.mark.parametrize( 'cls,expected', @@ -200,6 +207,40 @@ 'type': 'object', }, ), + ( + ISO639_3, + { + 'properties': { + 'x': { + 'title': 'X', + 'type': 'string', + 'enum': languages, + 'maxLength': 3, + 'minLength': 3, + } + }, + 'required': ['x'], + 'title': 'Model', + 'type': 'object', + }, + ), + ( + ISO639_5, + { + 'properties': { + 'x': { + 'title': 'X', + 'type': 'string', + 'enum': language_families, + 'maxLength': 3, + 'minLength': 3, + } + }, + 'required': ['x'], + 'title': 'Model', + 'type': 'object', + }, + ), ], ) def test_json_schema(cls, expected): diff --git a/tests/test_language_codes.py b/tests/test_language_codes.py new file mode 100644 index 00000000..27cc44ab --- /dev/null +++ b/tests/test_language_codes.py @@ -0,0 +1,53 @@ +import re + +import pycountry +import pytest +from pydantic import BaseModel, ValidationError + +from pydantic_extra_types import language_code + + +class ISO3CheckingModel(BaseModel): + lang: language_code.ISO639_3 + + +class ISO5CheckingModel(BaseModel): + lang: language_code.ISO639_5 + + +@pytest.mark.parametrize('lang', map(lambda lang: lang.alpha_3, pycountry.languages)) +def test_iso_ISO639_3_code_ok(lang: str): + model = ISO3CheckingModel(lang=lang) + assert model.lang == lang + assert model.model_dump() == {'lang': lang} # test serialization + + +@pytest.mark.parametrize('lang', map(lambda lang: lang.alpha_3, pycountry.language_families)) +def test_iso_639_5_code_ok(lang: str): + model = ISO5CheckingModel(lang=lang) + assert model.lang == lang + assert model.model_dump() == {'lang': lang} # test serialization + + +def test_iso3_language_fail(): + with pytest.raises( + ValidationError, + match=re.escape( + '1 validation error for ISO3CheckingModel\nlang\n ' + 'Invalid ISO 639-3 language code. ' + "See https://en.wikipedia.org/wiki/ISO_639-3 [type=ISO649_3, input_value='LOL', input_type=str]" + ), + ): + ISO3CheckingModel(lang='LOL') + + +def test_iso5_language_fail(): + with pytest.raises( + ValidationError, + match=re.escape( + '1 validation error for ISO5CheckingModel\nlang\n ' + 'Invalid ISO 639-5 language code. ' + "See https://en.wikipedia.org/wiki/ISO_639-5 [type=ISO649_5, input_value='LOL', input_type=str]" + ), + ): + ISO5CheckingModel(lang='LOL')