Skip to content

Commit

Permalink
implemented sqlite__datediff macro using epoch deltas
Browse files Browse the repository at this point in the history
- needed to make some empirical adjustments; added comments for discussion
  • Loading branch information
tom-juntunen committed Apr 21, 2024
1 parent 1bd64e4 commit f52f881
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 32 deletions.
109 changes: 78 additions & 31 deletions dbt/include/sqlite/macros/utils/datediff.sql
Original file line number Diff line number Diff line change
@@ -1,42 +1,89 @@

{# TODO: fully implement this and rename #}
{# adapted from postgresql #}
{% macro sqlite__datediff_broken(first_date, second_date, datepart) -%}

{#
-- The sqlite__datediff macro uses epoch time deltas to calculate differences between dates, offering precision down to the millisecond level.
-- Despite SQLite's limitations in handling sub-millisecond accuracy, the macro reliably handles differences across various date parts
-- (year, month, week, day, hour, minute, second, millisecond).
-- To ensure the macro's effectiveness even with the smallest discernible time differences (e.g., one millisecond), empirical adjustments have been implemented.
-- These adjustments are particularly crucial for broader date parts like 'week'.
-- The decision to use CEIL or FLOOR for rounding was refined through testing to accurately reflect differences when dates are separated by as little as one millisecond.
-- This empirical approach ensures that the macro not only meets expected functional accuracy but also addresses real-world use cases effectively.
-- For example, calculating the week difference between two timestamps only a millisecond apart requires careful consideration of how to round fractional weeks.
-- The choice to use CEIL in certain contexts was driven by the need to acknowledge even the minimal time difference as a full unit where contextually appropriate.
-- TODO: More unit testing should be done with a comprehensive calendar table "solved" in another RDBMS that supports datediff natively for comparing against the results of this macro.
#}
{% macro sqlite__datediff(first_date, second_date, datepart) -%}
{% set datepart = datepart.lower() %}
{% if datepart == 'year' %}
(strftime('%Y', {{second_date}}) - strftime('%Y', {{first_date}}))
{#
{% elif datepart == 'quarter' %}
({{ datediff(first_date, second_date, 'year') }} * 4 + date_part('quarter', ({{second_date}})::date) - date_part('quarter', ({{first_date}})::date))
#}
(strftime('%Y', {{ second_date }}) - strftime('%Y', {{ first_date }}))
{% elif datepart == 'month' %}
(({{ datediff(first_date, second_date, 'year') }} * 12 + strftime('%m', {{second_date}})) - strftime('%m', {{first_date}}))
((strftime('%Y', {{ second_date }}) - strftime('%Y', {{ first_date }})) * 12) +
(strftime('%m', {{ second_date }}) - strftime('%m', {{ first_date }}))
{% elif datepart == 'day' %}
(floor(cast(strftime('%s', {{second_date}}) - strftime('%s', {{first_date}}) as real) / 86400) +
case when {{second_date}} <= strftime('%Y-%m-%d 23:59:59.999999', {{first_date}}) then -1 else 0 end)
CASE
WHEN
((strftime('%s', {{ second_date }}) - strftime('%s', {{ first_date }})) / 86400.0) >= 0
THEN CEIL(
(strftime('%s', {{ second_date }}) - strftime('%s', {{ first_date }})) / 86400.0
)
ELSE FLOOR(
(strftime('%s', {{ second_date }}) - strftime('%s', {{ first_date }})) / 86400.0
)
END
{% elif datepart == 'week' %}
({{ datediff(first_date, second_date, 'day') }} / 7 + case
when strftime('%w', {{first_date}}) <= strftime('%w', {{second_date}}) then
case when {{first_date}} <= {{second_date}} then 0 else -1 end
else
case when {{first_date}} <= {{second_date}} then 1 else 0 end
end)
CASE
WHEN
((strftime('%s', {{ second_date }}) - strftime('%s', {{ first_date }})) / 604800.0) >= 0.285715
THEN CEIL(
(strftime('%s', {{ second_date }}) - strftime('%s', {{ first_date }})) / 604800.0
)
WHEN
((strftime('%s', {{ second_date }}) - strftime('%s', {{ first_date }})) / 604800.0) <= -0.285715
THEN FLOOR(
(strftime('%s', {{ second_date }}) - strftime('%s', {{ first_date }})) / 604800.0
)
ELSE CAST(
(strftime('%s', {{ second_date }}) - strftime('%s', {{ first_date }})) / 604800.0
AS INTEGER)
END
{% elif datepart == 'hour' %}
{# ({{ datediff(first_date, second_date, 'day') }} * 24 + strftime("%H", {{second_date}}) - strftime("%H", {{first_date}})) #}
(ceil(cast(strftime('%s', {{second_date}}) - strftime('%s', {{first_date}}) as real) / 3600))
CASE
WHEN
((strftime('%s', {{ second_date }}) - strftime('%s', {{ first_date }})) / 3600.0) >= 0
THEN CEIL(
(strftime('%s', {{ second_date }}) - strftime('%s', {{ first_date }})) / 3600.0
)
ELSE FLOOR(
(strftime('%s', {{ second_date }}) - strftime('%s', {{ first_date }})) / 3600.0
)
END
{% elif datepart == 'minute' %}
{# ({{ datediff(first_date, second_date, 'hour') }} * 60 + strftime("%M", {{second_date}}) - strftime("%M", {{first_date}})) #}
(ceil(cast(strftime('%s', {{second_date}}) - strftime('%s', {{first_date}}) as real) / 60))
CASE
WHEN
((strftime('%s', {{ second_date }}) - strftime('%s', {{ first_date }})) / 60.0) >= 0
THEN CEIL(
(strftime('%s', {{ second_date }}) - strftime('%s', {{ first_date }})) / 60.0
)
ELSE FLOOR(
(strftime('%s', {{ second_date }}) - strftime('%s', {{ first_date }})) / 60.0
)
END
{% elif datepart == 'second' %}
(strftime('%s', {{second_date}}) - strftime('%s', {{first_date}}))
{#
CASE
WHEN
((strftime('%s', {{ second_date }}) + cast(substr(strftime('%f', {{ second_date }}), instr(strftime('%f', {{ second_date }}), '.') + 1) as real) / 1000.0) -
(strftime('%s', {{ first_date }}) + cast(substr(strftime('%f', {{ first_date }}), instr(strftime('%f', {{ first_date }}), '.') + 1) as real) / 1000.0)) >= 0
THEN CEIL(
(strftime('%s', {{ second_date }}) + cast(substr(strftime('%f', {{ second_date }}), instr(strftime('%f', {{ second_date }}), '.') + 1) as real) / 1000.0) -
(strftime('%s', {{ first_date }}) + cast(substr(strftime('%f', {{ first_date }}), instr(strftime('%f', {{ first_date }}), '.') + 1) as real) / 1000.0)
)
ELSE FLOOR(
(strftime('%s', {{ second_date }}) + cast(substr(strftime('%f', {{ second_date }}), instr(strftime('%f', {{ second_date }}), '.') + 1) as real) / 1000.0) -
(strftime('%s', {{ first_date }}) + cast(substr(strftime('%f', {{ first_date }}), instr(strftime('%f', {{ first_date }}), '.') + 1) as real) / 1000.0)
)
END
{% elif datepart == 'millisecond' %}
({{ datediff(first_date, second_date, 'minute') }} * 60000 + floor(date_part('millisecond', ({{second_date}})::timestamp)) - floor(date_part('millisecond', ({{first_date}})::timestamp)))
{% elif datepart == 'microsecond' %}
({{ datediff(first_date, second_date, 'minute') }} * 60000000 + floor(date_part('microsecond', ({{second_date}})::timestamp)) - floor(date_part('microsecond', ({{first_date}})::timestamp)))
#}
((1000 * (strftime('%s', {{ second_date }}))) + cast(substr(strftime('%f', {{ second_date }}), instr(strftime('%f', {{ second_date }}), '.') + 1) as integer) -
(1000 * (strftime('%s', {{ first_date }}))) + cast(substr(strftime('%f', {{ first_date }}), instr(strftime('%f', {{ first_date }}), '.') + 1) as integer))
{% else %}
{{ exceptions.raise_compiler_error("Unsupported datepart for macro datediff in sqlite: {!r}".format(datepart)) }}
{{ exceptions.raise_compiler_error("Unsupported datepart for macro datediff in SQLite: '" ~ datepart ~ "'") }}
{% endif %}

{%- endmacro %}
1 change: 0 additions & 1 deletion tests/functional/adapter/utils/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,6 @@ def models(self):
}


@pytest.mark.skip("TODO: implement datediff")
class TestDateDiff(BaseDateDiff):
pass

Expand Down

0 comments on commit f52f881

Please sign in to comment.