From 93e7f0e190460b326c6086406f86c5a5d454d60c Mon Sep 17 00:00:00 2001 From: Russ Allbery Date: Tue, 21 May 2024 15:28:04 -0700 Subject: [PATCH] Add parse_timedelta function to parse durations Add a new safir.datetime.parse_timedelta function that parses a human-friendly syntax for time durations into a datetime.timedelta. The syntax was taken from semaphore and is now also used in the configuration parser for Gafaelfawr. --- changelog.d/20240521_152707_rra_DM_44444.md | 3 + docs/user-guide/datetime.rst | 38 +++++++++++ src/safir/datetime.py | 74 +++++++++++++++++++++ tests/datetime_test.py | 22 ++++++ 4 files changed, 137 insertions(+) create mode 100644 changelog.d/20240521_152707_rra_DM_44444.md diff --git a/changelog.d/20240521_152707_rra_DM_44444.md b/changelog.d/20240521_152707_rra_DM_44444.md new file mode 100644 index 00000000..491a8bb4 --- /dev/null +++ b/changelog.d/20240521_152707_rra_DM_44444.md @@ -0,0 +1,3 @@ +### New features + +- Add new function `safir.datetime.parse_timedelta`, which parses a human-friendly syntax for specifying time durations into a Python `datetime.timedelta`. diff --git a/docs/user-guide/datetime.rst b/docs/user-guide/datetime.rst index 5c96354a..c0f49e19 100644 --- a/docs/user-guide/datetime.rst +++ b/docs/user-guide/datetime.rst @@ -75,3 +75,41 @@ Safir therefore also provides `safir.datetime.format_datetime_for_logging`, whic As the name of the function indicates, this function should only be used when formatting dates for logging and other human display. Dates that may need to be parsed again by another program should use `~safir.datetime.isodatetime` instead. + +Parsing time intervals +====================== + +Pydantic by default supports specifying `datetime.timedelta` fields as either a floating-point number of seconds or as an ISO 8601 duration. +The syntax for ISO 8601 durations is unambiguous, but it's obscure and not widely used. +For example, ``P23DT23H`` represents a duration of 23 days and 23 hours. + +Safir provides a function, `safir.datetime.parse_timedelta` that parses an alternative syntax for specifying durations that's easier for humans to read and is similar to the syntax supported by other languages and libraries. +Its input is a list of numbers and duration abbreviations, optionally separated by whitespace. +The supported abbreviations are: + +- Week: ``weeks``, ``week``, ``w`` +- Day: ``days``, ``day``, ``d`` +- Hour: ``hours``, ``hour``, ``hr``, ``h`` +- Minute: ``minutes``, ``minute``, ``mins``, ``min``, ``m`` +- Second: ``seconds``, ``second``, ``secs``, ``sec``, ``s`` + +So, for example, the duration mentioned above could be given as ``23d23h`` or ``23days 23hours``. + +To accept this syntax as input for a Pydantic model, use a field validator such as the following: + +.. code-block:: python + + from pydantic import field_validator + from safir.datetime import parse_timedelta + + + @field_validator("lifetime", mode="before") + @classmethod + def _validate_lifetime( + cls, v: str | float | timedelta + ) -> float | timedelta: + if not isinstance(v, str): + return v + return parse_timedelta(v) + +This disables the built-in Pydantic support for ISO 8601 durations in favor of the syntax shown above. diff --git a/src/safir/datetime.py b/src/safir/datetime.py index 03a0719b..801be9a3 100644 --- a/src/safir/datetime.py +++ b/src/safir/datetime.py @@ -2,14 +2,25 @@ from __future__ import annotations +import re from datetime import UTC, datetime, timedelta from typing import overload +_TIMEDELTA_PATTERN = re.compile( + r"((?P\d+?)\s*(weeks|week|w))?\s*" + r"((?P\d+?)\s*(days|day|d))?\s*" + r"((?P\d+?)\s*(hours|hour|hr|h))?\s*" + r"((?P\d+?)\s*(minutes|minute|mins|min|m))?\s*" + r"((?P\d+?)\s*(seconds|second|secs|sec|s))?$" +) +"""Regular expression pattern for a time duration.""" + __all__ = [ "current_datetime", "format_datetime_for_logging", "isodatetime", "parse_isodatetime", + "parse_timedelta", ] @@ -139,3 +150,66 @@ def parse_isodatetime(time_string: str) -> datetime: if not time_string.endswith("Z"): raise ValueError(f"{time_string} does not end with Z") return datetime.fromisoformat(time_string[:-1] + "+00:00") + + +def parse_timedelta(text: str) -> timedelta: + """Parse a string into a `datetime.timedelta`. + + Expects a string consisting of one or more sequences of numbers and + duration abbreviations, separated by optional whitespace. Whitespace at + the beginning and end of the string is ignored. The supported + abbreviations are: + + - Week: ``weeks``, ``week``, ``w`` + - Day: ``days``, ``day``, ``d`` + - Hour: ``hours``, ``hour``, ``hr``, ``h`` + - Minute: ``minutes``, ``minute``, ``mins``, ``min``, ``m`` + - Second: ``seconds``, ``second``, ``secs``, ``sec``, ``s`` + + If several are present, they must be given in the above order. Example + valid strings are ``8d`` (8 days), ``4h 3minutes`` (four hours and three + minutes), and ``5w4d`` (five weeks and four days). + + This function can be as a before-mode validator for Pydantic + `~datetime.timedelta` fields, replacing Pydantic's default ISO 8601 + duration support. + + Parameters + ---------- + text + Input string. + + Returns + ------- + datetime.timedelta + Converted `datetime.timedelta`. + + Raises + ------ + ValueError + Raised if the string is not in a valid format. + + Examples + -------- + To accept a `~datetime.timedelta` in this format in a Pydantic model, use + a Pydantic field validator such as the following: + + .. code-block:: python + + @field_validator("lifetime", mode="before") + @classmethod + def _validate_lifetime( + cls, v: str | float | timedelta + ) -> float | timedelta: + if not isinstance(v, str): + return v + return parse_timedelta(v) + + This will disable the Pydantic support for ISO 8601 durations and expect + the format parsed by this function instead. + """ + m = _TIMEDELTA_PATTERN.match(text.strip()) + if m is None: + raise ValueError(f"Could not parse {text!r} as a time duration") + td_args = {k: int(v) for k, v in m.groupdict().items() if v is not None} + return timedelta(**td_args) diff --git a/tests/datetime_test.py b/tests/datetime_test.py index 2cc0d27e..6832c828 100644 --- a/tests/datetime_test.py +++ b/tests/datetime_test.py @@ -12,6 +12,7 @@ format_datetime_for_logging, isodatetime, parse_isodatetime, + parse_timedelta, ) @@ -84,3 +85,24 @@ class Test(BaseModel): json_model = Test(time=now).model_dump_json() model = Test.model_validate_json(json_model) assert format_datetime_for_logging(model.time) == expected + + +def test_parse_timdelta() -> None: + assert parse_timedelta("8d") == timedelta(days=8) + assert parse_timedelta("4h 3minutes\n") == timedelta(hours=4, minutes=3) + assert parse_timedelta("\n 5w4d") == timedelta(weeks=5, days=4) + assert parse_timedelta( + "2weeks 2days 2hours 2minutes 2seconds" + ) == timedelta(weeks=2, days=2, hours=2, minutes=2, seconds=2) + assert parse_timedelta("1week 1day 1hour 1minute 1second") == timedelta( + weeks=1, days=1, hours=1, minutes=1, seconds=1 + ) + assert parse_timedelta("4hr 5mins 6secs") == timedelta( + hours=4, minutes=5, seconds=6 + ) + assert parse_timedelta("17min 65sec") == timedelta(minutes=17, seconds=65) + + with pytest.raises(ValueError, match="Could not parse"): + parse_timedelta("3s4m") + with pytest.raises(ValueError, match="Could not parse"): + parse_timedelta("4weeks 36ms")