Skip to content

Commit

Permalink
Added LanguageAlpha2 and LanguageName types (#153)
Browse files Browse the repository at this point in the history
* Added LanguageAlpha2 type

* 📝 Add language code validation and documentation

---------

Co-authored-by: Yasser Tahiri <[email protected]>
  • Loading branch information
odelmarcelle and yezz123 authored Mar 4, 2024
1 parent 0b3191b commit f254b34
Show file tree
Hide file tree
Showing 3 changed files with 284 additions and 2 deletions.
211 changes: 210 additions & 1 deletion pydantic_extra_types/language_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@

from __future__ import annotations

from typing import Any
from dataclasses import dataclass
from functools import lru_cache
from typing import Any, Union

from pydantic import GetCoreSchemaHandler, GetJsonSchemaHandler
from pydantic_core import PydanticCustomError, core_schema
Expand All @@ -18,6 +20,213 @@
)


@dataclass
class LanguageInfo:
"""
LanguageInfo is a dataclass that contains the language information.
Args:
alpha2: The language code in the [ISO 639-1 alpha-2](https://en.wikipedia.org/wiki/ISO_639-1) format.
alpha3: The language code in the [ISO 639-3 alpha-3](https://en.wikipedia.org/wiki/ISO_639-3) format.
name: The language name.
"""

alpha2: Union[str, None]
alpha3: str
name: str


@lru_cache
def _languages() -> list[LanguageInfo]:
"""
Return a list of LanguageInfo objects containing the language information.
Returns:
A list of LanguageInfo objects containing the language information.
"""
return [
LanguageInfo(
alpha2=getattr(language, 'alpha_2', None),
alpha3=language.alpha_3,
name=language.name,
)
for language in pycountry.languages
]


@lru_cache
def _index_by_alpha2() -> dict[str, LanguageInfo]:
"""
Return a dictionary with the language code in the [ISO 639-1 alpha-2](https://en.wikipedia.org/wiki/ISO_639-1) format as the key and the LanguageInfo object as the value.
"""
return {language.alpha2: language for language in _languages() if language.alpha2 is not None}


@lru_cache
def _index_by_alpha3() -> dict[str, LanguageInfo]:
"""
Return a dictionary with the language code in the [ISO 639-3 alpha-3](https://en.wikipedia.org/wiki/ISO_639-3) format as the key and the LanguageInfo object as the value.
"""
return {language.alpha3: language for language in _languages()}


@lru_cache
def _index_by_name() -> dict[str, LanguageInfo]:
"""
Return a dictionary with the language name as the key and the LanguageInfo object as the value.
"""
return {language.name: language for language in _languages()}


class LanguageAlpha2(str):
"""LanguageAlpha2 parses languages codes in the [ISO 639-1 alpha-2](https://en.wikipedia.org/wiki/ISO_639-1)
format.
```py
from pydantic import BaseModel
from pydantic_extra_types.language_code import LanguageAlpha2
class Movie(BaseModel):
audio_lang: LanguageAlpha2
subtitles_lang: LanguageAlpha2
movie = Movie(audio_lang='de', subtitles_lang='fr')
print(movie)
#> audio_lang='de' subtitles_lang='fr'
```
"""

@classmethod
def _validate(cls, __input_value: str, _: core_schema.ValidationInfo) -> LanguageAlpha2:
"""
Validate a language code in the ISO 639-1 alpha-2 format from the provided str value.
Args:
__input_value: The str value to be validated.
_: The Pydantic ValidationInfo.
Returns:
The validated language code in the ISO 639-1 alpha-2 format.
"""
if __input_value not in _index_by_alpha2():
raise PydanticCustomError('language_alpha2', 'Invalid language alpha2 code')
return cls(__input_value)

@classmethod
def __get_pydantic_core_schema__(
cls, source: type[Any], handler: GetCoreSchemaHandler
) -> core_schema.AfterValidatorFunctionSchema:
"""
Return a Pydantic CoreSchema with the language code in the ISO 639-1 alpha-2 format validation.
Args:
source: The source type.
handler: The handler to get the CoreSchema.
Returns:
A Pydantic CoreSchema with the language code in the ISO 639-1 alpha-2 format validation.
"""
return core_schema.with_info_after_validator_function(
cls._validate,
core_schema.str_schema(to_lower=True),
)

@classmethod
def __get_pydantic_json_schema__(
cls, schema: core_schema.CoreSchema, handler: GetJsonSchemaHandler
) -> dict[str, Any]:
"""
Return a Pydantic JSON Schema with the language code in the ISO 639-1 alpha-2 format validation.
Args:
schema: The Pydantic CoreSchema.
handler: The handler to get the JSON Schema.
Returns:
A Pydantic JSON Schema with the language code in the ISO 639-1 alpha-2 format validation.
"""
json_schema = handler(schema)
json_schema.update({'pattern': r'^\w{2}$'})
return json_schema

@property
def alpha3(self) -> str:
"""The language code in the [ISO 639-3 alpha-3](https://en.wikipedia.org/wiki/ISO_639-3) format."""
return _index_by_alpha2()[self].alpha3

@property
def name(self) -> str:
"""The language name."""
return _index_by_alpha2()[self].name


class LanguageName(str):
"""LanguageName parses languages names listed in the [ISO 639-3 standard](https://en.wikipedia.org/wiki/ISO_639-3)
format.
```py
from pydantic import BaseModel
from pydantic_extra_types.language_code import LanguageName
class Movie(BaseModel):
audio_lang: LanguageName
subtitles_lang: LanguageName
movie = Movie(audio_lang='Dutch', subtitles_lang='Mandarin Chinese')
print(movie)
#> audio_lang='Dutch' subtitles_lang='Mandarin Chinese'
```
"""

@classmethod
def _validate(cls, __input_value: str, _: core_schema.ValidationInfo) -> LanguageName:
"""
Validate a language name from the provided str value.
Args:
__input_value: The str value to be validated.
_: The Pydantic ValidationInfo.
Returns:
The validated language name.
"""
if __input_value not in _index_by_name():
raise PydanticCustomError('language_name', 'Invalid language name')
return cls(__input_value)

@classmethod
def __get_pydantic_core_schema__(
cls, source: type[Any], handler: GetCoreSchemaHandler
) -> core_schema.AfterValidatorFunctionSchema:
"""
Return a Pydantic CoreSchema with the language name validation.
Args:
source: The source type.
handler: The handler to get the CoreSchema.
Returns:
A Pydantic CoreSchema with the language name validation.
"""
return core_schema.with_info_after_validator_function(
cls._validate,
core_schema.str_schema(),
serialization=core_schema.to_string_ser_schema(),
)

@property
def alpha2(self) -> Union[str, None]:
"""The language code in the [ISO 639-1 alpha-2](https://en.wikipedia.org/wiki/ISO_639-1) format. Does not exist for all languages."""
return _index_by_name()[self].alpha2

@property
def alpha3(self) -> str:
"""The language code in the [ISO 639-3 alpha-3](https://en.wikipedia.org/wiki/ISO_639-3) format."""
return _index_by_name()[self].alpha3


class ISO639_3(str):
"""ISO639_3 parses Language in the [ISO 639-3 alpha-3](https://en.wikipedia.org/wiki/ISO_639-3_alpha-3)
format.
Expand Down
20 changes: 19 additions & 1 deletion tests/test_json_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
)
from pydantic_extra_types.currency_code import ISO4217, Currency
from pydantic_extra_types.isbn import ISBN
from pydantic_extra_types.language_code import ISO639_3, ISO639_5
from pydantic_extra_types.language_code import ISO639_3, ISO639_5, LanguageAlpha2, LanguageName
from pydantic_extra_types.mac_address import MacAddress
from pydantic_extra_types.payment import PaymentCardNumber
from pydantic_extra_types.pendulum_dt import DateTime
Expand Down Expand Up @@ -219,6 +219,24 @@
'type': 'object',
},
),
(
LanguageAlpha2,
{
'properties': {'x': {'pattern': '^\\w{2}$', 'title': 'X', 'type': 'string'}},
'required': ['x'],
'title': 'Model',
'type': 'object',
},
),
(
LanguageName,
{
'properties': {'x': {'title': 'X', 'type': 'string'}},
'required': ['x'],
'title': 'Model',
'type': 'object',
},
),
(
ISO639_3,
{
Expand Down
55 changes: 55 additions & 0 deletions tests/test_language_codes.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,37 @@
import re
from string import printable

import pycountry
import pytest
from pydantic import BaseModel, ValidationError

from pydantic_extra_types import language_code
from pydantic_extra_types.language_code import (
LanguageAlpha2,
LanguageInfo,
LanguageName,
_index_by_alpha2,
_index_by_alpha3,
_index_by_name,
)

PARAMS_AMOUNT = 20


@pytest.fixture(scope='module', name='MovieAlpha2')
def movie_alpha2_fixture():
class Movie(BaseModel):
audio_lang: LanguageAlpha2

return Movie


@pytest.fixture(scope='module', name='MovieName')
def movie_name_fixture():
class Movie(BaseModel):
audio_lang: LanguageName

return Movie


class ISO3CheckingModel(BaseModel):
Expand All @@ -15,6 +42,34 @@ class ISO5CheckingModel(BaseModel):
lang: language_code.ISO639_5


@pytest.mark.parametrize('alpha2, language_data', list(_index_by_alpha2().items()))
def test_valid_alpha2(alpha2: str, language_data: LanguageInfo, MovieAlpha2):
the_godfather = MovieAlpha2(audio_lang=alpha2)
assert the_godfather.audio_lang == language_data.alpha2
assert the_godfather.audio_lang.alpha3 == language_data.alpha3
assert the_godfather.audio_lang.name == language_data.name


@pytest.mark.parametrize('alpha2', list(printable) + list(_index_by_alpha3().keys())[:PARAMS_AMOUNT])
def test_invalid_alpha2(alpha2: str, MovieAlpha2):
with pytest.raises(ValidationError, match='Invalid language alpha2 code'):
MovieAlpha2(audio_lang=alpha2)


@pytest.mark.parametrize('name, language_data', list(_index_by_name().items())[:PARAMS_AMOUNT])
def test_valid_name(name: str, language_data: LanguageInfo, MovieName):
the_godfather = MovieName(audio_lang=name)
assert the_godfather.audio_lang == language_data.name
assert the_godfather.audio_lang.alpha2 == language_data.alpha2
assert the_godfather.audio_lang.alpha3 == language_data.alpha3


@pytest.mark.parametrize('name', set(printable) - {'E', 'U'}) # E and U are valid language codes
def test_invalid_name(name: str, MovieName):
with pytest.raises(ValidationError, match='Invalid language name'):
MovieName(audio_lang=name)


@pytest.mark.parametrize('lang', map(lambda lang: lang.alpha_3, pycountry.languages))
def test_iso_ISO639_3_code_ok(lang: str):
model = ISO3CheckingModel(lang=lang)
Expand Down

0 comments on commit f254b34

Please sign in to comment.