From 41b573ef7466c4ca8d6abc1ac8e3a09110e1662a Mon Sep 17 00:00:00 2001 From: Tom Juntunen Date: Sun, 21 Apr 2024 02:38:42 -0700 Subject: [PATCH] implemented sqlite__datediff macro using epoch deltas and empirical adjustments --- dbt/include/sqlite/macros/utils/datediff.sql | 106 +++++++++++++------ tests/functional/adapter/utils/test_utils.py | 1 - 2 files changed, 75 insertions(+), 32 deletions(-) diff --git a/dbt/include/sqlite/macros/utils/datediff.sql b/dbt/include/sqlite/macros/utils/datediff.sql index 4d07769..87c04f0 100644 --- a/dbt/include/sqlite/macros/utils/datediff.sql +++ b/dbt/include/sqlite/macros/utils/datediff.sql @@ -1,42 +1,86 @@ - -{# TODO: fully implement this and rename #} -{# adapted from postgresql #} -{% macro sqlite__datediff_broken(first_date, second_date, datepart) -%} - +{# +-- Datetime precision in SQLite is only reliable down to the millisecond level, not microsecond level. +-- The datetime calculations within this macro incorporate empirically determined thresholds (0.5, -0.5, etc.) +-- to manage the rounding of differences more accurately across various units (days, weeks, etc.). +-- These thresholds are chosen to appropriately round up or down, depending on the context of the calculation, +-- ensuring that partial units are handled in a manner that best reflects their impact in real-world scenarios. +-- This approach helps address edge cases where the exact boundary between units (like weeks) might otherwise +-- lead to intuitively incorrect results, thus providing a more accurate and useful datediff function. +#} +{% macro sqlite__datediff(first_date, second_date, datepart) -%} + {% set datepart = datepart.lower() %} {% if datepart == 'year' %} - (strftime('%Y', {{second_date}}) - strftime('%Y', {{first_date}})) - {# - {% elif datepart == 'quarter' %} - ({{ datediff(first_date, second_date, 'year') }} * 4 + date_part('quarter', ({{second_date}})::date) - date_part('quarter', ({{first_date}})::date)) - #} + (strftime('%Y', {{ second_date }}) - strftime('%Y', {{ first_date }})) {% elif datepart == 'month' %} - (({{ datediff(first_date, second_date, 'year') }} * 12 + strftime('%m', {{second_date}})) - strftime('%m', {{first_date}})) + ((strftime('%Y', {{ second_date }}) - strftime('%Y', {{ first_date }})) * 12) + + (strftime('%m', {{ second_date }}) - strftime('%m', {{ first_date }})) {% elif datepart == 'day' %} - (floor(cast(strftime('%s', {{second_date}}) - strftime('%s', {{first_date}}) as real) / 86400) + - case when {{second_date}} <= strftime('%Y-%m-%d 23:59:59.999999', {{first_date}}) then -1 else 0 end) + CASE + WHEN + ((strftime('%s', {{ second_date }}) - strftime('%s', {{ first_date }})) / 86400.0) >= 0 + THEN CEIL( + (strftime('%s', {{ second_date }}) - strftime('%s', {{ first_date }})) / 86400.0 + ) + ELSE FLOOR( + (strftime('%s', {{ second_date }}) - strftime('%s', {{ first_date }})) / 86400.0 + ) + END {% elif datepart == 'week' %} - ({{ datediff(first_date, second_date, 'day') }} / 7 + case - when strftime('%w', {{first_date}}) <= strftime('%w', {{second_date}}) then - case when {{first_date}} <= {{second_date}} then 0 else -1 end - else - case when {{first_date}} <= {{second_date}} then 1 else 0 end - end) + CASE + WHEN + ((strftime('%s', {{ second_date }}) - strftime('%s', {{ first_date }})) / 604800.0) >= 0.285715 + THEN CEIL( + (strftime('%s', {{ second_date }}) - strftime('%s', {{ first_date }})) / 604800.0 + ) + WHEN + ((strftime('%s', {{ second_date }}) - strftime('%s', {{ first_date }})) / 604800.0) <= -0.285715 + THEN FLOOR( + (strftime('%s', {{ second_date }}) - strftime('%s', {{ first_date }})) / 604800.0 + ) + ELSE CAST( + (strftime('%s', {{ second_date }}) - strftime('%s', {{ first_date }})) / 604800.0 + AS INTEGER) + END {% elif datepart == 'hour' %} - {# ({{ datediff(first_date, second_date, 'day') }} * 24 + strftime("%H", {{second_date}}) - strftime("%H", {{first_date}})) #} - (ceil(cast(strftime('%s', {{second_date}}) - strftime('%s', {{first_date}}) as real) / 3600)) + CASE + WHEN + ((strftime('%s', {{ second_date }}) - strftime('%s', {{ first_date }})) / 3600.0) >= 0 + THEN CEIL( + (strftime('%s', {{ second_date }}) - strftime('%s', {{ first_date }})) / 3600.0 + ) + ELSE FLOOR( + (strftime('%s', {{ second_date }}) - strftime('%s', {{ first_date }})) / 3600.0 + ) + END {% elif datepart == 'minute' %} - {# ({{ datediff(first_date, second_date, 'hour') }} * 60 + strftime("%M", {{second_date}}) - strftime("%M", {{first_date}})) #} - (ceil(cast(strftime('%s', {{second_date}}) - strftime('%s', {{first_date}}) as real) / 60)) + CASE + WHEN + ((strftime('%s', {{ second_date }}) - strftime('%s', {{ first_date }})) / 60.0) >= 0 + THEN CEIL( + (strftime('%s', {{ second_date }}) - strftime('%s', {{ first_date }})) / 60.0 + ) + ELSE FLOOR( + (strftime('%s', {{ second_date }}) - strftime('%s', {{ first_date }})) / 60.0 + ) + END {% elif datepart == 'second' %} - (strftime('%s', {{second_date}}) - strftime('%s', {{first_date}})) - {# + CASE + WHEN + ((strftime('%s', {{ second_date }}) + cast(substr(strftime('%f', {{ second_date }}), instr(strftime('%f', {{ second_date }}), '.') + 1) as real) / 1000.0) - + (strftime('%s', {{ first_date }}) + cast(substr(strftime('%f', {{ first_date }}), instr(strftime('%f', {{ first_date }}), '.') + 1) as real) / 1000.0)) >= 0 + THEN CEIL( + (strftime('%s', {{ second_date }}) + cast(substr(strftime('%f', {{ second_date }}), instr(strftime('%f', {{ second_date }}), '.') + 1) as real) / 1000.0) - + (strftime('%s', {{ first_date }}) + cast(substr(strftime('%f', {{ first_date }}), instr(strftime('%f', {{ first_date }}), '.') + 1) as real) / 1000.0) + ) + ELSE FLOOR( + (strftime('%s', {{ second_date }}) + cast(substr(strftime('%f', {{ second_date }}), instr(strftime('%f', {{ second_date }}), '.') + 1) as real) / 1000.0) - + (strftime('%s', {{ first_date }}) + cast(substr(strftime('%f', {{ first_date }}), instr(strftime('%f', {{ first_date }}), '.') + 1) as real) / 1000.0) + ) + END {% elif datepart == 'millisecond' %} - ({{ datediff(first_date, second_date, 'minute') }} * 60000 + floor(date_part('millisecond', ({{second_date}})::timestamp)) - floor(date_part('millisecond', ({{first_date}})::timestamp))) - {% elif datepart == 'microsecond' %} - ({{ datediff(first_date, second_date, 'minute') }} * 60000000 + floor(date_part('microsecond', ({{second_date}})::timestamp)) - floor(date_part('microsecond', ({{first_date}})::timestamp))) - #} + ((1000 * (strftime('%s', {{ second_date }}))) + cast(substr(strftime('%f', {{ second_date }}), instr(strftime('%f', {{ second_date }}), '.') + 1) as integer) - + (1000 * (strftime('%s', {{ first_date }}))) + cast(substr(strftime('%f', {{ first_date }}), instr(strftime('%f', {{ first_date }}), '.') + 1) as integer)) {% else %} - {{ exceptions.raise_compiler_error("Unsupported datepart for macro datediff in sqlite: {!r}".format(datepart)) }} + {{ exceptions.raise_compiler_error("Unsupported datepart for macro datediff in SQLite: '" ~ datepart ~ "'") }} {% endif %} - {%- endmacro %} diff --git a/tests/functional/adapter/utils/test_utils.py b/tests/functional/adapter/utils/test_utils.py index fd40e79..c214e19 100644 --- a/tests/functional/adapter/utils/test_utils.py +++ b/tests/functional/adapter/utils/test_utils.py @@ -124,7 +124,6 @@ def models(self): } -@pytest.mark.skip("TODO: implement datediff") class TestDateDiff(BaseDateDiff): pass