From 4e45bc51fc164531370abcd9f28201b694623cfe Mon Sep 17 00:00:00 2001 From: Pedro Eugenio Rocha Pedreira Date: Sun, 22 Sep 2024 15:18:42 -0700 Subject: [PATCH] Fix date_diff() bug across time zone boundaries (#11053) Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/11053 Date diff calculation for timestamp with time zone types need to be performed in the time zone of the first parameter to follow Presto's semantic. Timestamp calculation normalized to UTC may not return the same result if the interval crosses a daylight savings time. Reviewed By: amitkdutta, kgpai Differential Revision: D63159202 fbshipit-source-id: fb8eaf10dc6e8ab14e8eb635ff64ce2be4939823 --- velox/functions/prestosql/DateTimeFunctions.h | 25 ++-- .../prestosql/tests/DateTimeFunctionsTest.cpp | 112 +++++++++++++----- 2 files changed, 99 insertions(+), 38 deletions(-) diff --git a/velox/functions/prestosql/DateTimeFunctions.h b/velox/functions/prestosql/DateTimeFunctions.h index a2ebf5dcc957..7672a2184bcb 100644 --- a/velox/functions/prestosql/DateTimeFunctions.h +++ b/velox/functions/prestosql/DateTimeFunctions.h @@ -1375,9 +1375,8 @@ struct DateDiffFunction : public TimestampWithTimezoneSupport { const arg_type& unitString, const arg_type& timestamp1, const arg_type& timestamp2) { - const auto unit = unit_.has_value() - ? unit_.value() - : fromDateTimeUnitString(unitString, true /*throwIfInvalid*/).value(); + const auto unit = unit_.value_or( + fromDateTimeUnitString(unitString, true /*throwIfInvalid*/).value()); if (LIKELY(sessionTimeZone_ != nullptr)) { // sessionTimeZone not null means that the config @@ -1416,13 +1415,19 @@ struct DateDiffFunction : public TimestampWithTimezoneSupport { FOLLY_ALWAYS_INLINE void call( int64_t& result, const arg_type& unitString, - const arg_type& timestamp1, - const arg_type& timestamp2) { - call( - result, - unitString, - this->toTimestamp(timestamp1, true), - this->toTimestamp(timestamp2, true)); + const arg_type& timestampWithTz1, + const arg_type& timestampWithTz2) { + const auto unit = unit_.value_or( + fromDateTimeUnitString(unitString, true /*throwIfInvalid*/).value()); + + // Presto's behavior is to use the time zone of the first parameter to + // perform the calculation. Note that always normalizing to UTC is not + // correct as calculations may cross daylight savings boundaries. + auto timestamp1 = this->toTimestamp(timestampWithTz1, false); + auto timestamp2 = this->toTimestamp(timestampWithTz2, true); + timestamp2.toTimezone(*tz::locateZone(unpackZoneKeyId(timestampWithTz1))); + + result = diffTimestamp(unit, timestamp1, timestamp2); } }; diff --git a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp index 879da92fd6e3..03e64f68b8c3 100644 --- a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp +++ b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp @@ -2804,62 +2804,118 @@ TEST_F(DateTimeFunctionsTest, dateDiffTimestamp) { } TEST_F(DateTimeFunctionsTest, dateDiffTimestampWithTimezone) { - const auto dateDiff = [&](const std::string& unit, - std::optional timestamp1, - const std::optional& timeZoneName1, - std::optional timestamp2, - const std::optional& timeZoneName2) { - auto ts1 = (timestamp1.has_value() && timeZoneName1.has_value()) - ? makeTimestampWithTimeZoneVector( - timestamp1.value(), timeZoneName1.value().c_str()) - : BaseVector::createNullConstant(TIMESTAMP_WITH_TIME_ZONE(), 1, pool()); - auto ts2 = (timestamp2.has_value() && timeZoneName2.has_value()) - ? makeTimestampWithTimeZoneVector( - timestamp2.value(), timeZoneName2.value().c_str()) - : BaseVector::createNullConstant(TIMESTAMP_WITH_TIME_ZONE(), 1, pool()); - + const auto dateDiff = [&](std::optional unit, + std::optional input1, + std::optional input2) { return evaluateOnce( - fmt::format("date_diff('{}', c0, c1)", unit), - makeRowVector({ts1, ts2})); + "date_diff(c0, c1, c2)", + {VARCHAR(), TIMESTAMP_WITH_TIME_ZONE(), TIMESTAMP_WITH_TIME_ZONE()}, + unit, + TimestampWithTimezone::pack(input1), + TimestampWithTimezone::pack(input2)); }; + // Null behavior. + EXPECT_EQ( + std::nullopt, + dateDiff( + std::nullopt, + TimestampWithTimezone(0, 0), + TimestampWithTimezone(0, 0))); + + EXPECT_EQ( + std::nullopt, + dateDiff("asdf", std::nullopt, TimestampWithTimezone(0, 0))); + // timestamp1: 1970-01-01 00:00:00.000 +00:00 (0) // timestamp2: 2020-08-25 16:30:10.123 -08:00 (1'598'373'010'123) EXPECT_EQ( 1598373010123, dateDiff( "millisecond", - 0, - "+00:00", - 1'598'373'010'123, - "America/Los_Angeles")); + TimestampWithTimezone(0, "+00:00"), + TimestampWithTimezone(1'598'373'010'123, "America/Los_Angeles"))); EXPECT_EQ( 1598373010, dateDiff( - "second", 0, "+00:00", 1'598'373'010'123, "America/Los_Angeles")); + "second", + TimestampWithTimezone(0, "+00:00"), + TimestampWithTimezone(1'598'373'010'123, "America/Los_Angeles"))); EXPECT_EQ( 26639550, dateDiff( - "minute", 0, "+00:00", 1'598'373'010'123, "America/Los_Angeles")); + "minute", + TimestampWithTimezone(0, "+00:00"), + TimestampWithTimezone(1'598'373'010'123, "America/Los_Angeles"))); EXPECT_EQ( 443992, - dateDiff("hour", 0, "+00:00", 1'598'373'010'123, "America/Los_Angeles")); + dateDiff( + "hour", + TimestampWithTimezone(0, "+00:00"), + TimestampWithTimezone(1'598'373'010'123, "America/Los_Angeles"))); EXPECT_EQ( 18499, - dateDiff("day", 0, "+00:00", 1'598'373'010'123, "America/Los_Angeles")); + dateDiff( + "day", + TimestampWithTimezone(0, "+00:00"), + TimestampWithTimezone(1'598'373'010'123, "America/Los_Angeles"))); EXPECT_EQ( 2642, - dateDiff("week", 0, "+00:00", 1'598'373'010'123, "America/Los_Angeles")); + dateDiff( + "week", + TimestampWithTimezone(0, "+00:00"), + TimestampWithTimezone(1'598'373'010'123, "America/Los_Angeles"))); EXPECT_EQ( 607, - dateDiff("month", 0, "+00:00", 1'598'373'010'123, "America/Los_Angeles")); + dateDiff( + "month", + TimestampWithTimezone(0, "+00:00"), + TimestampWithTimezone(1'598'373'010'123, "America/Los_Angeles"))); EXPECT_EQ( 202, dateDiff( - "quarter", 0, "+00:00", 1'598'373'010'123, "America/Los_Angeles")); + "quarter", + TimestampWithTimezone(0, "+00:00"), + TimestampWithTimezone(1'598'373'010'123, "America/Los_Angeles"))); EXPECT_EQ( 50, - dateDiff("year", 0, "+00:00", 1'598'373'010'123, "America/Los_Angeles")); + dateDiff( + "year", + TimestampWithTimezone(0, "+00:00"), + TimestampWithTimezone(1'598'373'010'123, "America/Los_Angeles"))); + + // Test if calculations are being performed in correct zone. Presto behavior + // is to use the zone of the first parameter. Note that that this UTC interval + // (a, b) crosses a daylight savings boundary in PST when PST loses one hour. + // So whenever the calculation is performed in PST, the interval is + // effectively smaller than 24h and returns zero. + auto a = parseTimestamp("2024-11-02 17:00:00").toMillis(); + auto b = parseTimestamp("2024-11-03 17:30:00").toMillis(); + EXPECT_EQ( + 1, + dateDiff( + "day", + TimestampWithTimezone(a, "UTC"), + TimestampWithTimezone(b, "America/Los_Angeles"))); + EXPECT_EQ( + 1, + dateDiff( + "day", + TimestampWithTimezone(a, "UTC"), + TimestampWithTimezone(b, "UTC"))); + + EXPECT_EQ( + 0, + dateDiff( + "day", + TimestampWithTimezone(a, "America/Los_Angeles"), + TimestampWithTimezone(b, "UTC"))); + EXPECT_EQ( + 0, + dateDiff( + "day", + TimestampWithTimezone(a, "America/Los_Angeles"), + TimestampWithTimezone(b, "America/Los_Angeles"))); } TEST_F(DateTimeFunctionsTest, parseDatetime) {