Skip to content

Commit

Permalink
Merge pull request #247 from lsst-sqre/tickets/DM-44444
Browse files Browse the repository at this point in the history
DM-44444: Add parse_timedelta function to parse durations
  • Loading branch information
rra authored May 22, 2024
2 parents ad198fe + 9aa60c4 commit f68a6e0
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 0 deletions.
3 changes: 3 additions & 0 deletions changelog.d/20240521_152707_rra_DM_44444.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
### New features

- Add new function `safir.datetime.parse_timedelta`, which parses a human-friendly syntax for specifying time durations into a Python `datetime.timedelta`.
43 changes: 43 additions & 0 deletions docs/user-guide/datetime.rst
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,46 @@ Safir therefore also provides `safir.datetime.format_datetime_for_logging`, whic

As the name of the function indicates, this function should only be used when formatting dates for logging and other human display.
Dates that may need to be parsed again by another program should use `~safir.datetime.isodatetime` instead.

Parsing time intervals
======================

Pydantic by default supports specifying `datetime.timedelta` fields as either a floating-point number of seconds or as an ISO 8601 duration.
The syntax for ISO 8601 durations is unambiguous, but it's obscure and not widely used.
For example, ``P23DT23H`` represents a duration of 23 days and 23 hours.

Safir provides a function, `safir.datetime.parse_timedelta` that parses an alternative syntax for specifying durations that's easier for humans to read and is similar to the syntax supported by other languages and libraries.
Its input is a list of numbers and duration abbreviations, optionally separated by whitespace.
The supported abbreviations are:

- Week: ``weeks``, ``week``, ``w``
- Day: ``days``, ``day``, ``d``
- Hour: ``hours``, ``hour``, ``hr``, ``h``
- Minute: ``minutes``, ``minute``, ``mins``, ``min``, ``m``
- Second: ``seconds``, ``second``, ``secs``, ``sec``, ``s``

So, for example, the duration mentioned above could be given as ``23d23h`` or ``23days 23hours``.

To accept this syntax as input for a Pydantic model, use a field validator such as the following:

.. code-block:: python
from pydantic import BaseModel, field_validator
from safir.datetime import parse_timedelta
class Someething(BaseModel):
lifetime: timedelta = Field(..., title="Lifetime")
# ... other fields
@field_validator("lifetime", mode="before")
@classmethod
def _validate_lifetime(
cls, v: str | float | timedelta
) -> float | timedelta:
if not isinstance(v, str):
return v
return parse_timedelta(v)
This disables the built-in Pydantic support for ISO 8601 durations in favor of the syntax shown above.
74 changes: 74 additions & 0 deletions src/safir/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,25 @@

from __future__ import annotations

import re
from datetime import UTC, datetime, timedelta
from typing import overload

_TIMEDELTA_PATTERN = re.compile(
r"((?P<weeks>\d+?)\s*(weeks|week|w))?\s*"
r"((?P<days>\d+?)\s*(days|day|d))?\s*"
r"((?P<hours>\d+?)\s*(hours|hour|hr|h))?\s*"
r"((?P<minutes>\d+?)\s*(minutes|minute|mins|min|m))?\s*"
r"((?P<seconds>\d+?)\s*(seconds|second|secs|sec|s))?$"
)
"""Regular expression pattern for a time duration."""

__all__ = [
"current_datetime",
"format_datetime_for_logging",
"isodatetime",
"parse_isodatetime",
"parse_timedelta",
]


Expand Down Expand Up @@ -139,3 +150,66 @@ def parse_isodatetime(time_string: str) -> datetime:
if not time_string.endswith("Z"):
raise ValueError(f"{time_string} does not end with Z")
return datetime.fromisoformat(time_string[:-1] + "+00:00")


def parse_timedelta(text: str) -> timedelta:
"""Parse a string into a `datetime.timedelta`.
Expects a string consisting of one or more sequences of numbers and
duration abbreviations, separated by optional whitespace. Whitespace at
the beginning and end of the string is ignored. The supported
abbreviations are:
- Week: ``weeks``, ``week``, ``w``
- Day: ``days``, ``day``, ``d``
- Hour: ``hours``, ``hour``, ``hr``, ``h``
- Minute: ``minutes``, ``minute``, ``mins``, ``min``, ``m``
- Second: ``seconds``, ``second``, ``secs``, ``sec``, ``s``
If several are present, they must be given in the above order. Example
valid strings are ``8d`` (8 days), ``4h 3minutes`` (four hours and three
minutes), and ``5w4d`` (five weeks and four days).
This function can be as a before-mode validator for Pydantic
`~datetime.timedelta` fields, replacing Pydantic's default ISO 8601
duration support.
Parameters
----------
text
Input string.
Returns
-------
datetime.timedelta
Converted `datetime.timedelta`.
Raises
------
ValueError
Raised if the string is not in a valid format.
Examples
--------
To accept a `~datetime.timedelta` in this format in a Pydantic model, use
a Pydantic field validator such as the following:
.. code-block:: python
@field_validator("lifetime", mode="before")
@classmethod
def _validate_lifetime(
cls, v: str | float | timedelta
) -> float | timedelta:
if not isinstance(v, str):
return v
return parse_timedelta(v)
This will disable the Pydantic support for ISO 8601 durations and expect
the format parsed by this function instead.
"""
m = _TIMEDELTA_PATTERN.match(text.strip())
if m is None:
raise ValueError(f"Could not parse {text!r} as a time duration")
td_args = {k: int(v) for k, v in m.groupdict().items() if v is not None}
return timedelta(**td_args)
22 changes: 22 additions & 0 deletions tests/datetime_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
format_datetime_for_logging,
isodatetime,
parse_isodatetime,
parse_timedelta,
)


Expand Down Expand Up @@ -84,3 +85,24 @@ class Test(BaseModel):
json_model = Test(time=now).model_dump_json()
model = Test.model_validate_json(json_model)
assert format_datetime_for_logging(model.time) == expected


def test_parse_timdelta() -> None:
assert parse_timedelta("8d") == timedelta(days=8)
assert parse_timedelta("4h 3minutes\n") == timedelta(hours=4, minutes=3)
assert parse_timedelta("\n 5w4d") == timedelta(weeks=5, days=4)
assert parse_timedelta(
"2weeks 2days 2hours 2minutes 2seconds"
) == timedelta(weeks=2, days=2, hours=2, minutes=2, seconds=2)
assert parse_timedelta("1week 1day 1hour 1minute 1second") == timedelta(
weeks=1, days=1, hours=1, minutes=1, seconds=1
)
assert parse_timedelta("4hr 5mins 6secs") == timedelta(
hours=4, minutes=5, seconds=6
)
assert parse_timedelta("17min 65sec") == timedelta(minutes=17, seconds=65)

with pytest.raises(ValueError, match="Could not parse"):
parse_timedelta("3s4m")
with pytest.raises(ValueError, match="Could not parse"):
parse_timedelta("4weeks 36ms")

0 comments on commit f68a6e0

Please sign in to comment.