From 7c4a029b8c211b918b74ff1ca65259847b5a763f Mon Sep 17 00:00:00 2001 From: Russ Allbery Date: Wed, 17 Jul 2024 10:21:04 -0700 Subject: [PATCH] Add Pydantic types for Postgres and Redis DSNs Add new types `safir.pydantic.EnvAsyncPostgresDsn` and `safir.pydantic.EnvRedisDsn`, which validate PostgreSQL and Redis DSNs but rewrite them based on the environment variables set by tox-docker. Programs using these types for their configuration will therefore automatically honor tox-docker environment variables when running the test suite. `EnvAsyncPostgresDsn` also enforces that the scheme of the DSN is compatible with asyncpg and the Safir database support. --- .../20240717_101902_rra_DM_45281_queue.md | 3 + docs/_rst_epilog.rst | 1 + docs/documenteer.toml | 2 + docs/user-guide/arq.rst | 8 +- docs/user-guide/database.rst | 2 + docs/user-guide/pydantic.rst | 49 ++++++++ src/safir/pydantic.py | 93 +++++++++++++- tests/pydantic_test.py | 115 ++++++++++++++++++ 8 files changed, 270 insertions(+), 3 deletions(-) create mode 100644 changelog.d/20240717_101902_rra_DM_45281_queue.md diff --git a/changelog.d/20240717_101902_rra_DM_45281_queue.md b/changelog.d/20240717_101902_rra_DM_45281_queue.md new file mode 100644 index 00000000..517088da --- /dev/null +++ b/changelog.d/20240717_101902_rra_DM_45281_queue.md @@ -0,0 +1,3 @@ +### New features + +- Add new types `safir.pydantic.EnvAsyncPostgresDsn` and `safir.pydantic.EnvRedisDsn`, which validate PostgreSQL and Redis DSNs but rewrite them based on the environment variables set by tox-docker. Programs using these types for their configuration will therefore automatically honor tox-docker environment variables when running the test suite. `EnvAsyncPostgresDsn` also enforces that the scheme of the DSN is compatible with asyncpg and the Safir database support. diff --git a/docs/_rst_epilog.rst b/docs/_rst_epilog.rst index 64b13af6..e80423de 100644 --- a/docs/_rst_epilog.rst +++ b/docs/_rst_epilog.rst @@ -23,4 +23,5 @@ .. _structlog: https://www.structlog.org/en/stable/ .. _templatekit: https://templatekit.lsst.io .. _tox: https://tox.wiki/en/latest/ +.. _tox-docker: https://tox-docker.readthedocs.io/en/latest/ .. _Uvicorn: https://www.uvicorn.org/ diff --git a/docs/documenteer.toml b/docs/documenteer.toml index cb0801f9..393f02d4 100644 --- a/docs/documenteer.toml +++ b/docs/documenteer.toml @@ -25,6 +25,8 @@ nitpick_ignore = [ ["py:class", "BaseModel"], # sphinx-automodapi apparently doesn't recognize TypeAlias as an object # that should have generated documentation, even with include-all-objects. + ["py:obj", "safir.pydantic.EnvAsyncPostgresDsn"], + ["py:obj", "safir.pydantic.EnvRedisDsn"], ["py:obj", "safir.pydantic.HumanTimedelta"], ["py:obj", "safir.pydantic.SecondsTimedelta"], ] diff --git a/docs/user-guide/arq.rst b/docs/user-guide/arq.rst index 822c52f7..ea08380e 100644 --- a/docs/user-guide/arq.rst +++ b/docs/user-guide/arq.rst @@ -50,13 +50,14 @@ If your app uses a configuration system like ``pydantic.BaseSettings``, this exa from urllib.parse import urlparse from arq.connections import RedisSettings - from pydantic import Field, RedisDsn + from pydantic import Field from pydantic_settings import BaseSettings from safir.arq import ArqMode + from safir.pydantic import EnvRedisDsn class Config(BaseSettings): - arq_queue_url: RedisDsn = Field( + arq_queue_url: EnvRedisDsn = Field( "redis://localhost:6379/1", validation_alias="APP_ARQ_QUEUE_URL" ) @@ -77,6 +78,9 @@ If your app uses a configuration system like ``pydantic.BaseSettings``, this exa ) return redis_settings +The `safir.pydantic.EnvRedisDsn` type will automatically incorporate Redis location information from tox-docker. +See :ref:`pydantic-dsns` for more details. + Worker set up ------------- diff --git a/docs/user-guide/database.rst b/docs/user-guide/database.rst index 7b0c6e3a..927138da 100644 --- a/docs/user-guide/database.rst +++ b/docs/user-guide/database.rst @@ -13,6 +13,8 @@ Safir uses the `asyncpg`_ PostgreSQL database driver. Database support in Safir is optional. To use it, depend on ``safir[db]`` in your pip requirements. +Also see :ref:`pydantic-dsns` for Pydantic types that help with configuring the PostgreSQL DSN. + Initializing a database ======================= diff --git a/docs/user-guide/pydantic.rst b/docs/user-guide/pydantic.rst index a745fcbe..fa58a0b9 100644 --- a/docs/user-guide/pydantic.rst +++ b/docs/user-guide/pydantic.rst @@ -5,6 +5,55 @@ Utilities for Pydantic models Several validation and configuration problems arise frequently with Pydantic models. Safir offers some utility functions to assist in solving them. +.. _pydantic-dsns: + +Configuring PostgreSQL and Redis DSNs +===================================== + +Databases and other storage services often use a :abbr:`DSN (Data Source Name)` to specify how to connect to the service. +Pydantic provides multiple pre-defined types to parse and validate those DSNs, including ones for PostgreSQL and Redis. + +Safir applications often use tox-docker_ to start local PostgreSQL and Redis servers before running tests. +tox-docker starts services on random loopback IP addresses and ports, and stores the hostname and IP address in standard environment variables. + +Safir provides alternative data types for PostgreSQL and Redis DSNs that behave largely the same as the Pydantic data types if the tox-docker environment variables aren't set. +If the tox-docker variables are set, their contents are used to override the hostname and port of any provided DSN with the values provided by tox-docker. +This allows the application to get all of its configuration from environment variables at module load time without needing special code in every application to handle the tox-docker environment variables. + +For PostgreSQL DSNs, use the data type `safir.pydantic.EnvAsyncPostgresDsn` instead of `pydantic.PostgresDsn`. +This type additionally forces the scheme of the PostgreSQL DSN to either not specify the underying library or to specify asyncpg, allowing it to work correctly with the :doc:`Safir database API `. +Unlike the Pydantic type, `~safir.pydantic.EnvAsyncPostgresDsn` only supports a single host. + +For Redis DSNs, use the data type `safir.pydantic.EnvRedisDsn` instead of `pydantic.RedisDsn`. + +For example: + +.. code-block:: python + + from pydantic_settings import BaseSettings, SettingsConfigDict + from safir.pydantic import EnvAsyncPostgresDsn, EnvRedisDsn + + + class Config(BaseSettings): + database_url: EnvAsyncPostgresDsn + redis_url: EnvRedisDsn + + model_config = SettingsConfigDict( + env_prefix="EXAMPLE_", case_sensitive=False + ) + +These types only adjust DSNs initialized as normal. +They do not synthesize DSNs if none are set. +Therefore, the application will still need to set the corresponding environment variables in :file:`tox.ini` for testing purposes, although the hostname and port can be dummy values. +In this case, that would look something like: + +.. code-block:: ini + + [testenv:py] + setenv = + EXAMPLE_DATABASE_URL = postgresql://example@localhost/example + EXAMPLE_REDIS_URL = redis://localhost/0 + .. _pydantic-datetime: Normalizing datetime fields diff --git a/src/safir/pydantic.py b/src/safir/pydantic.py index b1b3aff4..9220994b 100644 --- a/src/safir/pydantic.py +++ b/src/safir/pydantic.py @@ -2,11 +2,19 @@ from __future__ import annotations +import os from collections.abc import Callable from datetime import UTC, datetime, timedelta from typing import Annotated, Any, ParamSpec, TypeAlias, TypeVar -from pydantic import BaseModel, BeforeValidator, ConfigDict +from pydantic import ( + AfterValidator, + BaseModel, + BeforeValidator, + ConfigDict, + UrlConstraints, +) +from pydantic_core import Url from .datetime import parse_timedelta @@ -15,6 +23,8 @@ __all__ = [ "CamelCaseModel", + "EnvAsyncPostgresDsn", + "EnvRedisDsn", "HumanTimedelta", "SecondsTimedelta", "normalize_datetime", @@ -24,6 +34,87 @@ ] +def _validate_env_async_postgres_dsn(v: Url) -> Url: + """Possibly adjust a PostgreSQL DSN based on environment variables. + + When run via tox and tox-docker, the PostgreSQL hostname and port will be + randomly selected and exposed only in environment variables. We have to + patch that into the database URL at runtime since `tox doesn't have a way + of substituting it into the environment + `__. + """ + if port := os.getenv("POSTGRES_5432_TCP_PORT"): + return Url.build( + scheme=v.scheme, + username=v.username, + password=v.password, + host=os.getenv("POSTGRES_HOST", v.unicode_host() or "localhost"), + port=int(port), + path=v.path.lstrip("/") if v.path else v.path, + query=v.query, + fragment=v.fragment, + ) + else: + return v + + +EnvAsyncPostgresDsn: TypeAlias = Annotated[ + Url, + UrlConstraints( + host_required=True, + allowed_schemes=["postgresql", "postgresql+asyncpg"], + ), + AfterValidator(_validate_env_async_postgres_dsn), +] +"""Async PostgreSQL data source URL honoring Docker environment variables. + +Unlike the standard Pydantic ``PostgresDsn`` type, this type does not support +multiple hostnames because Safir's database library does not support multiple +hostnames. +""" + + +def _validate_env_redis_dsn(v: Url) -> Url: + """Possibly adjust a Redis DSN based on environment variables. + + When run via tox and tox-docker, the Redis hostname and port will be + randomly selected and exposed only in environment variables. We have to + patch that into the Redis URL at runtime since `tox doesn't have a way of + substituting it into the environment + `__. + """ + if port := os.getenv("REDIS_6379_TCP_PORT"): + return Url.build( + scheme=v.scheme, + username=v.username, + password=v.password, + host=os.getenv("REDIS_HOST", v.unicode_host() or "localhost"), + port=int(port), + path=v.path.lstrip("/") if v.path else v.path, + query=v.query, + fragment=v.fragment, + ) + else: + return v + + +EnvRedisDsn: TypeAlias = Annotated[ + Url, + UrlConstraints( + allowed_schemes=["redis"], + default_host="localhost", + default_port=6379, + default_path="/0", + ), + AfterValidator(_validate_env_redis_dsn), +] +"""Redis data source URL honoring Docker environment variables. + +Unlike the standard Pydantic ``RedisDsn`` type, this does not support the +``rediss`` scheme, which indicates the use of TLS. +""" + + def _validate_human_timedelta(v: str | float | timedelta) -> float | timedelta: if not isinstance(v, str): return v diff --git a/tests/pydantic_test.py b/tests/pydantic_test.py index 26ca008d..8b3baa64 100644 --- a/tests/pydantic_test.py +++ b/tests/pydantic_test.py @@ -15,6 +15,8 @@ from safir.pydantic import ( CamelCaseModel, + EnvAsyncPostgresDsn, + EnvRedisDsn, HumanTimedelta, SecondsTimedelta, normalize_datetime, @@ -24,6 +26,119 @@ ) +def test_env_async_postgres_dsn(monkeypatch: pytest.MonkeyPatch) -> None: + class TestModel(BaseModel): + dsn: EnvAsyncPostgresDsn + + monkeypatch.delenv("POSTGRES_5432_TCP_PORT", raising=False) + monkeypatch.delenv("POSTGRES_HOST", raising=False) + model = TestModel.model_validate( + {"dsn": "postgresql://localhost:7777/some-database"} + ) + assert model.dsn.scheme == "postgresql" + assert not model.dsn.username + assert not model.dsn.password + assert model.dsn.host == "localhost" + assert model.dsn.port == 7777 + assert model.dsn.path == "/some-database" + assert not model.dsn.query + + model = TestModel.model_validate( + { + "dsn": ( + "postgresql+asyncpg://user:password@localhost/other" + "?connect_timeout=10" + ) + } + ) + assert model.dsn.scheme == "postgresql+asyncpg" + assert model.dsn.username == "user" + assert model.dsn.password == "password" + assert model.dsn.host == "localhost" + assert not model.dsn.port + assert model.dsn.path == "/other" + assert model.dsn.query == "connect_timeout=10" + + monkeypatch.setenv("POSTGRES_5432_TCP_PORT", "8999") + model = TestModel.model_validate( + { + "dsn": ( + "postgresql://user:password@localhost/other?connect_timeout=10" + ) + } + ) + assert model.dsn.scheme == "postgresql" + assert model.dsn.username == "user" + assert model.dsn.password == "password" + assert model.dsn.host == "localhost" + assert model.dsn.port == 8999 + assert model.dsn.path == "/other" + assert model.dsn.query == "connect_timeout=10" + + monkeypatch.setenv("POSTGRES_HOST", "example.com") + model = TestModel.model_validate({"dsn": "postgresql://localhost/other"}) + assert model.dsn.scheme == "postgresql" + assert not model.dsn.username + assert not model.dsn.password + assert model.dsn.host == "example.com" + assert model.dsn.port == 8999 + assert model.dsn.path == "/other" + assert not model.dsn.query + + with pytest.raises(ValidationError): + TestModel.model_validate( + {"dsn": "postgresql+psycopg2://localhost/other"} + ) + + +def test_env_redis_dsn(monkeypatch: pytest.MonkeyPatch) -> None: + class TestModel(BaseModel): + dsn: EnvRedisDsn + + monkeypatch.delenv("REDIS_6379_TCP_PORT", raising=False) + monkeypatch.delenv("REDIS_HOST", raising=False) + model = TestModel.model_validate( + {"dsn": "redis://user:password@example.com:7777/1"} + ) + assert model.dsn.scheme == "redis" + assert model.dsn.username == "user" + assert model.dsn.password == "password" + assert model.dsn.host == "example.com" + assert model.dsn.port == 7777 + assert model.dsn.path == "/1" + + model = TestModel.model_validate({"dsn": "redis://localhost"}) + assert model.dsn.scheme == "redis" + assert not model.dsn.username + assert not model.dsn.password + assert model.dsn.host == "localhost" + assert model.dsn.port == 6379 + assert model.dsn.path == "/0" + + monkeypatch.setenv("REDIS_6379_TCP_PORT", "4567") + model = TestModel.model_validate( + {"dsn": "redis://user:password@example.com:7777/1"} + ) + assert model.dsn.scheme == "redis" + assert model.dsn.username == "user" + assert model.dsn.password == "password" + assert model.dsn.host == "example.com" + assert model.dsn.port == 4567 + assert model.dsn.path == "/1" + + monkeypatch.setenv("REDIS_HOST", "127.12.0.1") + model = TestModel.model_validate({"dsn": "redis://localhost"}) + assert model.dsn.scheme == "redis" + assert not model.dsn.username + assert not model.dsn.password + assert model.dsn.host == "127.12.0.1" + assert model.dsn.port == 4567 + assert model.dsn.path == "/0" + + with pytest.raises(ValidationError): + TestModel.model_validate({"dsn": "rediss://example.com/0"}) + + def test_human_timedelta() -> None: class TestModel(BaseModel): delta: HumanTimedelta