diff --git a/pydantic_extra_types/language_code.py b/pydantic_extra_types/language_code.py new file mode 100644 index 00000000..98d2a5df --- /dev/null +++ b/pydantic_extra_types/language_code.py @@ -0,0 +1,206 @@ +from typing import Any, Literal + +from pydantic import ValidationInfo, WrapValidator +from typing_extensions import Annotated + + +def _not_iso_code(v: Any, next_: Any, ctx: ValidationInfo) -> Any: + try: + return next_(v, ctx) + except Exception as _: + raise ValueError( + f'{v} is not a valid ISO 639-3 language code. See https://wikipedia.org/wiki/ISO_639-3.' + ) from None + + +# language code definition as defined in ISO 639-3 https://wikipedia.org/wiki/ISO_639-3 +# basically just defines list of literals https://docs.pydantic.dev/1.10/usage/types/#literal-type +LanguageCode = Annotated[ + Literal[ + 'aar', + 'abk', + 'afr', + 'aka', + 'alb', + 'amh', + 'ara', + 'arg', + 'arm', + 'asm', + 'ava', + 'ave', + 'aym', + 'aze', + 'bak', + 'bam', + 'baq', + 'bel', + 'ben', + 'bih', + 'bis', + 'bos', + 'bre', + 'bul', + 'bur', + 'cat', + 'cha', + 'che', + 'chi', + 'chu', + 'chv', + 'cor', + 'cos', + 'cre', + 'cze', + 'dan', + 'div', + 'dut', + 'dzo', + 'eng', + 'epo', + 'est', + 'ewe', + 'fao', + 'fij', + 'fin', + 'fre', + 'fry', + 'ful', + 'geo', + 'ger', + 'gla', + 'gle', + 'glg', + 'glv', + 'gre', + 'grn', + 'guj', + 'hat', + 'hau', + 'heb', + 'her', + 'hin', + 'hmo', + 'hrv', + 'hun', + 'ibo', + 'ice', + 'ido', + 'iii', + 'iku', + 'ile', + 'ina', + 'ind', + 'ipk', + 'ita', + 'jav', + 'jpn', + 'kal', + 'kan', + 'kas', + 'kau', + 'kaz', + 'khm', + 'kik', + 'kin', + 'kir', + 'kom', + 'kon', + 'kor', + 'kua', + 'kur', + 'lao', + 'lat', + 'lav', + 'lim', + 'lin', + 'lit', + 'ltz', + 'lub', + 'lug', + 'mac', + 'mah', + 'mal', + 'mao', + 'mar', + 'may', + 'mlg', + 'mlt', + 'mon', + 'nau', + 'nav', + 'nbl', + 'nde', + 'ndo', + 'nep', + 'nno', + 'nob', + 'nor', + 'nya', + 'oci', + 'oji', + 'ori', + 'orm', + 'oss', + 'pan', + 'per', + 'pli', + 'pol', + 'por', + 'pus', + 'que', + 'roh', + 'rum', + 'run', + 'rus', + 'sag', + 'san', + 'sin', + 'slo', + 'slv', + 'sme', + 'smo', + 'sna', + 'snd', + 'som', + 'sot', + 'spa', + 'srd', + 'srp', + 'ssw', + 'sun', + 'swa', + 'swe', + 'tah', + 'tam', + 'tat', + 'tel', + 'tgk', + 'tgl', + 'tha', + 'tib', + 'tir', + 'ton', + 'tsn', + 'tso', + 'tuk', + 'tur', + 'twi', + 'uig', + 'ukr', + 'urd', + 'uzb', + 'ven', + 'vie', + 'vol', + 'wel', + 'wln', + 'wol', + 'xho', + 'yid', + 'yor', + 'zha', + 'zul', + ], + WrapValidator(_not_iso_code), +] diff --git a/tests/test_language_codes.py b/tests/test_language_codes.py new file mode 100644 index 00000000..e7b6fbb0 --- /dev/null +++ b/tests/test_language_codes.py @@ -0,0 +1,212 @@ +import pytest +from pydantic import BaseModel, ValidationError + +from pydantic_extra_types.language_code import LanguageCode + + +class CheckingModel(BaseModel): + lang: LanguageCode + + +@pytest.mark.parametrize( + 'lang', + [ + 'aar', + 'abk', + 'afr', + 'aka', + 'alb', + 'amh', + 'ara', + 'arg', + 'arm', + 'asm', + 'ava', + 'ave', + 'aym', + 'aze', + 'bak', + 'bam', + 'baq', + 'bel', + 'ben', + 'bih', + 'bis', + 'bos', + 'bre', + 'bul', + 'bur', + 'cat', + 'cha', + 'che', + 'chi', + 'chu', + 'chv', + 'cor', + 'cos', + 'cre', + 'cze', + 'dan', + 'div', + 'dut', + 'dzo', + 'eng', + 'epo', + 'est', + 'ewe', + 'fao', + 'fij', + 'fin', + 'fre', + 'fry', + 'ful', + 'geo', + 'ger', + 'gla', + 'gle', + 'glg', + 'glv', + 'gre', + 'grn', + 'guj', + 'hat', + 'hau', + 'heb', + 'her', + 'hin', + 'hmo', + 'hrv', + 'hun', + 'ibo', + 'ice', + 'ido', + 'iii', + 'iku', + 'ile', + 'ina', + 'ind', + 'ipk', + 'ita', + 'jav', + 'jpn', + 'kal', + 'kan', + 'kas', + 'kau', + 'kaz', + 'khm', + 'kik', + 'kin', + 'kir', + 'kom', + 'kon', + 'kor', + 'kua', + 'kur', + 'lao', + 'lat', + 'lav', + 'lim', + 'lin', + 'lit', + 'ltz', + 'lub', + 'lug', + 'mac', + 'mah', + 'mal', + 'mao', + 'mar', + 'may', + 'mlg', + 'mlt', + 'mon', + 'nau', + 'nav', + 'nbl', + 'nde', + 'ndo', + 'nep', + 'nno', + 'nob', + 'nor', + 'nya', + 'oci', + 'oji', + 'ori', + 'orm', + 'oss', + 'pan', + 'per', + 'pli', + 'pol', + 'por', + 'pus', + 'que', + 'roh', + 'rum', + 'run', + 'rus', + 'sag', + 'san', + 'sin', + 'slo', + 'slv', + 'sme', + 'smo', + 'sna', + 'snd', + 'som', + 'sot', + 'spa', + 'srd', + 'srp', + 'ssw', + 'sun', + 'swa', + 'swe', + 'tah', + 'tam', + 'tat', + 'tel', + 'tgk', + 'tgl', + 'tha', + 'tib', + 'tir', + 'ton', + 'tsn', + 'tso', + 'tuk', + 'tur', + 'twi', + 'uig', + 'ukr', + 'urd', + 'uzb', + 'ven', + 'vie', + 'vol', + 'wel', + 'wln', + 'wol', + 'xho', + 'yid', + 'yor', + 'zha', + 'zul', + ], +) +def test_language_code_ok(lang: str): + model = CheckingModel(lang=lang) + assert model.lang == lang + + +def test_language_fail(): + with pytest.raises( + ValidationError, + match=r'1 validation error for CheckingModel\nlang\n Value error, en-US is ' + 'not a valid ISO 639-3 language code\\. ' + 'See https://wikipedia.org/wiki/ISO_639-3\\..*', + ): + CheckingModel(lang='en-US')