Skip to content

Commit

Permalink
Fix date_diff() bug across time zone boundaries (#11053)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #11053

Date diff calculation for timestamp with time zone types need to be
performed in the time zone of the first parameter to follow Presto's semantic.
Timestamp calculation normalized to UTC may not return the same result if the
interval crosses a daylight savings time.

Reviewed By: amitkdutta, kgpai

Differential Revision: D63159202

fbshipit-source-id: fb8eaf10dc6e8ab14e8eb635ff64ce2be4939823
  • Loading branch information
pedroerp authored and facebook-github-bot committed Sep 22, 2024
1 parent 00e65bc commit 4e45bc5
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 38 deletions.
25 changes: 15 additions & 10 deletions velox/functions/prestosql/DateTimeFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -1375,9 +1375,8 @@ struct DateDiffFunction : public TimestampWithTimezoneSupport<T> {
const arg_type<Varchar>& unitString,
const arg_type<Timestamp>& timestamp1,
const arg_type<Timestamp>& timestamp2) {
const auto unit = unit_.has_value()
? unit_.value()
: fromDateTimeUnitString(unitString, true /*throwIfInvalid*/).value();
const auto unit = unit_.value_or(
fromDateTimeUnitString(unitString, true /*throwIfInvalid*/).value());

if (LIKELY(sessionTimeZone_ != nullptr)) {
// sessionTimeZone not null means that the config
Expand Down Expand Up @@ -1416,13 +1415,19 @@ struct DateDiffFunction : public TimestampWithTimezoneSupport<T> {
FOLLY_ALWAYS_INLINE void call(
int64_t& result,
const arg_type<Varchar>& unitString,
const arg_type<TimestampWithTimezone>& timestamp1,
const arg_type<TimestampWithTimezone>& timestamp2) {
call(
result,
unitString,
this->toTimestamp(timestamp1, true),
this->toTimestamp(timestamp2, true));
const arg_type<TimestampWithTimezone>& timestampWithTz1,
const arg_type<TimestampWithTimezone>& timestampWithTz2) {
const auto unit = unit_.value_or(
fromDateTimeUnitString(unitString, true /*throwIfInvalid*/).value());

// Presto's behavior is to use the time zone of the first parameter to
// perform the calculation. Note that always normalizing to UTC is not
// correct as calculations may cross daylight savings boundaries.
auto timestamp1 = this->toTimestamp(timestampWithTz1, false);
auto timestamp2 = this->toTimestamp(timestampWithTz2, true);
timestamp2.toTimezone(*tz::locateZone(unpackZoneKeyId(timestampWithTz1)));

result = diffTimestamp(unit, timestamp1, timestamp2);
}
};

Expand Down
112 changes: 84 additions & 28 deletions velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2804,62 +2804,118 @@ TEST_F(DateTimeFunctionsTest, dateDiffTimestamp) {
}

TEST_F(DateTimeFunctionsTest, dateDiffTimestampWithTimezone) {
const auto dateDiff = [&](const std::string& unit,
std::optional<int64_t> timestamp1,
const std::optional<std::string>& timeZoneName1,
std::optional<int64_t> timestamp2,
const std::optional<std::string>& timeZoneName2) {
auto ts1 = (timestamp1.has_value() && timeZoneName1.has_value())
? makeTimestampWithTimeZoneVector(
timestamp1.value(), timeZoneName1.value().c_str())
: BaseVector::createNullConstant(TIMESTAMP_WITH_TIME_ZONE(), 1, pool());
auto ts2 = (timestamp2.has_value() && timeZoneName2.has_value())
? makeTimestampWithTimeZoneVector(
timestamp2.value(), timeZoneName2.value().c_str())
: BaseVector::createNullConstant(TIMESTAMP_WITH_TIME_ZONE(), 1, pool());

const auto dateDiff = [&](std::optional<std::string> unit,
std::optional<TimestampWithTimezone> input1,
std::optional<TimestampWithTimezone> input2) {
return evaluateOnce<int64_t>(
fmt::format("date_diff('{}', c0, c1)", unit),
makeRowVector({ts1, ts2}));
"date_diff(c0, c1, c2)",
{VARCHAR(), TIMESTAMP_WITH_TIME_ZONE(), TIMESTAMP_WITH_TIME_ZONE()},
unit,
TimestampWithTimezone::pack(input1),
TimestampWithTimezone::pack(input2));
};

// Null behavior.
EXPECT_EQ(
std::nullopt,
dateDiff(
std::nullopt,
TimestampWithTimezone(0, 0),
TimestampWithTimezone(0, 0)));

EXPECT_EQ(
std::nullopt,
dateDiff("asdf", std::nullopt, TimestampWithTimezone(0, 0)));

// timestamp1: 1970-01-01 00:00:00.000 +00:00 (0)
// timestamp2: 2020-08-25 16:30:10.123 -08:00 (1'598'373'010'123)
EXPECT_EQ(
1598373010123,
dateDiff(
"millisecond",
0,
"+00:00",
1'598'373'010'123,
"America/Los_Angeles"));
TimestampWithTimezone(0, "+00:00"),
TimestampWithTimezone(1'598'373'010'123, "America/Los_Angeles")));
EXPECT_EQ(
1598373010,
dateDiff(
"second", 0, "+00:00", 1'598'373'010'123, "America/Los_Angeles"));
"second",
TimestampWithTimezone(0, "+00:00"),
TimestampWithTimezone(1'598'373'010'123, "America/Los_Angeles")));
EXPECT_EQ(
26639550,
dateDiff(
"minute", 0, "+00:00", 1'598'373'010'123, "America/Los_Angeles"));
"minute",
TimestampWithTimezone(0, "+00:00"),
TimestampWithTimezone(1'598'373'010'123, "America/Los_Angeles")));
EXPECT_EQ(
443992,
dateDiff("hour", 0, "+00:00", 1'598'373'010'123, "America/Los_Angeles"));
dateDiff(
"hour",
TimestampWithTimezone(0, "+00:00"),
TimestampWithTimezone(1'598'373'010'123, "America/Los_Angeles")));
EXPECT_EQ(
18499,
dateDiff("day", 0, "+00:00", 1'598'373'010'123, "America/Los_Angeles"));
dateDiff(
"day",
TimestampWithTimezone(0, "+00:00"),
TimestampWithTimezone(1'598'373'010'123, "America/Los_Angeles")));
EXPECT_EQ(
2642,
dateDiff("week", 0, "+00:00", 1'598'373'010'123, "America/Los_Angeles"));
dateDiff(
"week",
TimestampWithTimezone(0, "+00:00"),
TimestampWithTimezone(1'598'373'010'123, "America/Los_Angeles")));
EXPECT_EQ(
607,
dateDiff("month", 0, "+00:00", 1'598'373'010'123, "America/Los_Angeles"));
dateDiff(
"month",
TimestampWithTimezone(0, "+00:00"),
TimestampWithTimezone(1'598'373'010'123, "America/Los_Angeles")));
EXPECT_EQ(
202,
dateDiff(
"quarter", 0, "+00:00", 1'598'373'010'123, "America/Los_Angeles"));
"quarter",
TimestampWithTimezone(0, "+00:00"),
TimestampWithTimezone(1'598'373'010'123, "America/Los_Angeles")));
EXPECT_EQ(
50,
dateDiff("year", 0, "+00:00", 1'598'373'010'123, "America/Los_Angeles"));
dateDiff(
"year",
TimestampWithTimezone(0, "+00:00"),
TimestampWithTimezone(1'598'373'010'123, "America/Los_Angeles")));

// Test if calculations are being performed in correct zone. Presto behavior
// is to use the zone of the first parameter. Note that that this UTC interval
// (a, b) crosses a daylight savings boundary in PST when PST loses one hour.
// So whenever the calculation is performed in PST, the interval is
// effectively smaller than 24h and returns zero.
auto a = parseTimestamp("2024-11-02 17:00:00").toMillis();
auto b = parseTimestamp("2024-11-03 17:30:00").toMillis();
EXPECT_EQ(
1,
dateDiff(
"day",
TimestampWithTimezone(a, "UTC"),
TimestampWithTimezone(b, "America/Los_Angeles")));
EXPECT_EQ(
1,
dateDiff(
"day",
TimestampWithTimezone(a, "UTC"),
TimestampWithTimezone(b, "UTC")));

EXPECT_EQ(
0,
dateDiff(
"day",
TimestampWithTimezone(a, "America/Los_Angeles"),
TimestampWithTimezone(b, "UTC")));
EXPECT_EQ(
0,
dateDiff(
"day",
TimestampWithTimezone(a, "America/Los_Angeles"),
TimestampWithTimezone(b, "America/Los_Angeles")));
}

TEST_F(DateTimeFunctionsTest, parseDatetime) {
Expand Down

0 comments on commit 4e45bc5

Please sign in to comment.